mr5 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
fc95128 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43027451634407043,
"max": 0.2986099123954773,
"mean": -0.0025507817044854164,
"std": 0.04255499690771103,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06311193853616714,
"max": 0.10768741369247437,
"mean": 0.0006200151983648539,
"std": 0.03410356491804123,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.41268283128738403,
"max": 0.8365557193756104,
"mean": -0.00020680355373769999,
"std": 0.02410806156694889,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11543754488229752,
"max": 0.3218643069267273,
"mean": -0.0009378742543049157,
"std": 0.019571715965867043,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.792863130569458,
"max": 2.8707633018493652,
"mean": -0.0003630426654126495,
"std": 0.6153795719146729,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.27924296259880066,
"max": 0.3817594349384308,
"mean": 0.00042336067417636514,
"std": 0.042748212814331055,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22243480384349823,
"max": 0.20970797538757324,
"mean": -0.004494894295930862,
"std": 0.04093479365110397,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4279620349407196,
"max": 0.47544437646865845,
"mean": 3.4269442039658315e-06,
"std": 0.024507490918040276,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32538774609565735,
"max": 0.15757951140403748,
"mean": -0.046732865273952484,
"std": 0.05161404609680176,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4103900194168091,
"max": 0.3545621335506439,
"mean": -0.0001282805751543492,
"std": 0.02359895221889019,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.22982768714427948,
"max": 0.2626851797103882,
"mean": -0.029157839715480804,
"std": 0.04937523230910301,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2546607255935669,
"max": 0.8210369348526001,
"mean": 0.5255380868911743,
"std": 0.08102277666330338,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.296828031539917,
"max": 0.2656802833080292,
"mean": -0.0004245353629812598,
"std": 0.032100748270750046,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09280094504356384,
"max": 0.12531320750713348,
"mean": 0.0006500966264866292,
"std": 0.025744492188096046,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.2905188202857971,
"max": 0.28166285157203674,
"mean": -7.521975203417242e-05,
"std": 0.030932102352380753,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.9063615798950195,
"max": 5.821039199829102,
"mean": -0.009349350817501545,
"std": 1.2963582277297974,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4250166118144989,
"max": 0.34394335746765137,
"mean": 9.808164759306237e-05,
"std": 0.02995201013982296,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.02886926755309105,
"max": 0.027612265199422836,
"mean": -0.0003159886400680989,
"std": 0.012566552497446537,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4542844891548157,
"max": 0.4484859108924866,
"mean": 2.2895628717378713e-05,
"std": 0.023853421211242676,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08885892480611801,
"max": 0.09123405814170837,
"mean": 0.002273206366226077,
"std": 0.019519906491041183,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.26680853962898254,
"max": 1.0574053525924683,
"mean": 0.5312761068344116,
"std": 0.10467371344566345,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5747479200363159,
"max": 0.6086151599884033,
"mean": -0.00043056829599663615,
"std": 0.03859534114599228,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18226587772369385,
"max": 0.04570382833480835,
"mean": -0.029475372284650803,
"std": 0.04265210032463074,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1669524908065796,
"max": 1.6345643997192383,
"mean": 0.00032027901033870876,
"std": 0.027692919597029686,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16255705058574677,
"max": 0.20596350729465485,
"mean": -0.021122729405760765,
"std": 0.0279533751308918,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.2242354154586792,
"max": 0.8446622490882874,
"mean": 0.4876382350921631,
"std": 0.07536358386278152,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.25569042563438416,
"max": 0.3060862720012665,
"mean": -8.35508035379462e-06,
"std": 0.03346911817789078,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09542153775691986,
"max": 0.11059843748807907,
"mean": 6.575271254405379e-05,
"std": 0.026967303827404976,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.29741746187210083,
"max": 0.2962968945503235,
"mean": 5.0992566684726626e-05,
"std": 0.03253895416855812,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.170334339141846,
"max": 5.090466022491455,
"mean": -0.014626836404204369,
"std": 1.1584166288375854,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.3447432518005371,
"max": 0.3434843122959137,
"mean": 7.888684194767848e-05,
"std": 0.030058253556489944,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03619777783751488,
"max": 0.033210255205631256,
"mean": -0.00014313205610960722,
"std": 0.013021216727793217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.31545811891555786,
"max": 0.3753635585308075,
"mean": -2.0908952137688175e-05,
"std": 0.024055080488324165,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10554195195436478,
"max": 0.12217912822961807,
"mean": -0.001965724630281329,
"std": 0.02885899320244789,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.31185638904571533,
"max": 1.1226844787597656,
"mean": 0.6664173007011414,
"std": 0.09809636324644089,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8724010586738586,
"max": 0.6276066303253174,
"mean": 0.0016756090335547924,
"std": 0.04743661358952522,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27133694291114807,
"max": 0.034276124089956284,
"mean": -0.04661266878247261,
"std": 0.04062533751130104,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9213826656341553,
"max": 0.9645106792449951,
"mean": 0.0010220588883385062,
"std": 0.040701646357774734,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14473342895507812,
"max": 0.07504827529191971,
"mean": -0.009093794040381908,
"std": 0.025712795555591583,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.23969869315624237,
"max": 0.7134895920753479,
"mean": 0.4472740888595581,
"std": 0.05947508662939072,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.2730574309825897,
"max": 0.29789650440216064,
"mean": 8.741370038478635e-06,
"std": 0.035470616072416306,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11908062547445297,
"max": 0.11852753162384033,
"mean": 0.0007502459920942783,
"std": 0.027627233415842056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2813098430633545,
"max": 0.27990853786468506,
"mean": -7.670064951526001e-05,
"std": 0.03509994596242905,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.512526035308838,
"max": 2.5245351791381836,
"mean": 0.026777304708957672,
"std": 0.58714359998703,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.22134634852409363,
"max": 0.2719532251358032,
"mean": 2.8086524253012612e-06,
"std": 0.030731303617358208,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03337041661143303,
"max": 0.031244885176420212,
"mean": 0.0001174571443698369,
"std": 0.012399335391819477,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23538419604301453,
"max": 0.2318607121706009,
"mean": 5.6835913710528985e-05,
"std": 0.02569691836833954,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13590897619724274,
"max": 0.12807728350162506,
"mean": -0.005500740837305784,
"std": 0.039980240166187286,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3545033931732178,
"max": 1.174311876296997,
"mean": 0.7105965614318848,
"std": 0.10393685102462769,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6176598072052002,
"max": 0.5552863478660583,
"mean": 0.001160678919404745,
"std": 0.046113625168800354,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.1892954707145691,
"max": 0.024854592978954315,
"mean": -0.034856364130973816,
"std": 0.028640495613217354,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1318156719207764,
"max": 0.9712402820587158,
"mean": 0.0003593153669498861,
"std": 0.04234175756573677,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5985706448554993,
"max": 0.0630447119474411,
"mean": -0.004880559165030718,
"std": 0.028633911162614822,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.37550806999206543,
"max": 0.942255973815918,
"mean": 0.592631459236145,
"std": 0.06731508672237396,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3917059302330017,
"max": 0.3694884479045868,
"mean": 7.032141002127901e-05,
"std": 0.037185318768024445,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11911240965127945,
"max": 0.13666978478431702,
"mean": 0.0009285699925385416,
"std": 0.029224557802081108,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6197025179862976,
"max": 0.5093265771865845,
"mean": 1.5340243407990783e-05,
"std": 0.036438170820474625,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.196393966674805,
"max": 8.799202919006348,
"mean": -0.10933247208595276,
"std": 1.7004725933074951,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.2767159938812256,
"max": 0.23974454402923584,
"mean": 5.235425851424225e-05,
"std": 0.03261233866214752,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.051826898008584976,
"max": 0.039538491517305374,
"mean": 9.016307012643665e-05,
"std": 0.012965181842446327,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23091718554496765,
"max": 0.23482012748718262,
"mean": -2.2171980162966065e-05,
"std": 0.029389047995209694,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20455272495746613,
"max": 0.10541031509637833,
"mean": -0.0040219868533313274,
"std": 0.03264109417796135,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.33964791893959045,
"max": 1.0138026475906372,
"mean": 0.7007413506507874,
"std": 0.0968313068151474,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5653463006019592,
"max": 0.8341253995895386,
"mean": 0.0004152161709498614,
"std": 0.042294181883335114,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.2122603803873062,
"max": 0.03037133999168873,
"mean": -0.03219597041606903,
"std": 0.026528161019086838,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7555134892463684,
"max": 0.7197405099868774,
"mean": -1.6411166143370792e-05,
"std": 0.036835070699453354,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.2637326717376709,
"max": 0.10635162889957428,
"mean": -0.003013473004102707,
"std": 0.028875315561890602,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.2841384708881378,
"max": 0.6960581541061401,
"mean": 0.4994935393333435,
"std": 0.046687543392181396,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27874135971069336,
"max": 0.23421625792980194,
"mean": -0.0001108625583583489,
"std": 0.03875651955604553,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15407033264636993,
"max": 0.12659268081188202,
"mean": -0.002232097554951906,
"std": 0.03336996212601662,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41471850872039795,
"max": 0.6599792838096619,
"mean": -1.8830280168913305e-05,
"std": 0.03909522667527199,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.242863178253174,
"max": 4.727988243103027,
"mean": -0.020436234772205353,
"std": 1.0083643198013306,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24536164104938507,
"max": 0.20758995413780212,
"mean": 4.39189825556241e-05,
"std": 0.0339621901512146,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03461671993136406,
"max": 0.04490647837519646,
"mean": -1.8480626749806106e-05,
"std": 0.012636142782866955,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20118913054466248,
"max": 0.20648600161075592,
"mean": -2.914817741839215e-05,
"std": 0.031020086258649826,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.2001321166753769,
"max": 0.11347545683383942,
"mean": -0.0028973689768463373,
"std": 0.03452814370393753,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.36694979667663574,
"max": 1.0586931705474854,
"mean": 0.6705467104911804,
"std": 0.06646514683961868,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.39871448278427124,
"max": 0.5025006532669067,
"mean": -3.831302092294209e-05,
"std": 0.041130244731903076,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12894268333911896,
"max": 0.026869317516684532,
"mean": -0.030542686581611633,
"std": 0.021899448707699776,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.4495067298412323,
"max": 0.43352261185646057,
"mean": 7.56321387598291e-05,
"std": 0.034890398383140564,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.26795703172683716,
"max": 0.07305809110403061,
"mean": -0.0010922406800091267,
"std": 0.023138197138905525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.2873804569244385,
"max": 0.6860803365707397,
"mean": 0.5245892405509949,
"std": 0.047686196863651276,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22270891070365906,
"max": 0.22395135462284088,
"mean": 1.5596267985529266e-05,
"std": 0.03894849866628647,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.1365431845188141,
"max": 0.1094546914100647,
"mean": 0.0002404236583970487,
"std": 0.02924003079533577,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37548792362213135,
"max": 0.4377880096435547,
"mean": -9.806113666854799e-06,
"std": 0.039285749197006226,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8503658771514893,
"max": 5.0051727294921875,
"mean": 0.009742870926856995,
"std": 0.8458123803138733,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.2231559306383133,
"max": 0.22039616107940674,
"mean": -2.540778041293379e-07,
"std": 0.03440915793180466,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04364994913339615,
"max": 0.03587768226861954,
"mean": -0.00025836972054094076,
"std": 0.012079192325472832,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21319326758384705,
"max": 0.1889532059431076,
"mean": -1.7074991774279624e-05,
"std": 0.031535569578409195,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18106123805046082,
"max": 0.12093079835176468,
"mean": -0.0023932361509650946,
"std": 0.04127350077033043,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.422803670167923,
"max": 0.9430609345436096,
"mean": 0.6627297401428223,
"std": 0.05693160742521286,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.37100574374198914,
"max": 0.476217657327652,
"mean": -8.213143883040175e-05,
"std": 0.04088921844959259,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20879434049129486,
"max": 0.027236830443143845,
"mean": -0.03024592623114586,
"std": 0.021377045661211014,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3412210941314697,
"max": 0.7347204685211182,
"mean": 8.198502473533154e-05,
"std": 0.034765854477882385,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2404329776763916,
"max": 0.05046902596950531,
"mean": -0.001188310096040368,
"std": 0.020469345152378082,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3061925768852234,
"max": 0.654449999332428,
"mean": 0.5251765251159668,
"std": 0.04624079912900925,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30459216237068176,
"max": 0.21765196323394775,
"mean": 7.016396557446569e-05,
"std": 0.039494771510362625,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.1493639498949051,
"max": 0.13124904036521912,
"mean": 0.00033865522709675133,
"std": 0.03046908602118492,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.2574615776538849,
"max": 0.20232746005058289,
"mean": 3.111670594080351e-05,
"std": 0.03948463872075081,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.339005708694458,
"max": 2.378676176071167,
"mean": -0.026260126382112503,
"std": 0.45006638765335083,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18878056108951569,
"max": 0.2107384204864502,
"mean": 3.7163907109061256e-05,
"std": 0.034793294966220856,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.031842123717069626,
"max": 0.03563522920012474,
"mean": -0.00019889514078386128,
"std": 0.012288383208215237,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18877452611923218,
"max": 0.17039048671722412,
"mean": -6.83176185702905e-05,
"std": 0.03216997906565666,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13967929780483246,
"max": 0.13765227794647217,
"mean": -0.0025106696411967278,
"std": 0.051296915858983994,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4670410752296448,
"max": 0.9571460485458374,
"mean": 0.668942928314209,
"std": 0.052938032895326614,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.32432013750076294,
"max": 0.30918803811073303,
"mean": -9.502464308752678e-07,
"std": 0.040945153683423996,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12509244680404663,
"max": 0.025560801848769188,
"mean": -0.03070145845413208,
"std": 0.019835734739899635,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.44014739990234375,
"max": 0.44575265049934387,
"mean": 9.502484317636117e-05,
"std": 0.035118650645017624,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22483481466770172,
"max": 0.05185456946492195,
"mean": -0.0011811171425506473,
"std": 0.018479909747838974,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.33911338448524475,
"max": 0.7404670715332031,
"mean": 0.5587128400802612,
"std": 0.04148301109671593,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.2731746435165405,
"max": 0.2787404954433441,
"mean": 2.032621341641061e-05,
"std": 0.04105671867728233,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13706564903259277,
"max": 0.14011380076408386,
"mean": 0.0004902533255517483,
"std": 0.026642272248864174,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.49108198285102844,
"max": 0.35628437995910645,
"mean": 8.894230268197134e-05,
"std": 0.040694475173950195,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.300570249557495,
"max": 1.7478224039077759,
"mean": -0.021113090217113495,
"std": 0.5004414319992065,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.21800002455711365,
"max": 0.19787649810314178,
"mean": -4.053436714457348e-05,
"std": 0.034232478588819504,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.04136132448911667,
"max": 0.03894467279314995,
"mean": -0.0001396951702190563,
"std": 0.012888246215879917,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17788128554821014,
"max": 0.18331165611743927,
"mean": 4.789709782926366e-05,
"std": 0.031555820256471634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.1802123337984085,
"max": 0.1839253157377243,
"mean": -0.0022146895062178373,
"std": 0.05485367402434349,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.47431865334510803,
"max": 1.0268715620040894,
"mean": 0.6453023552894592,
"std": 0.05052410438656807,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.27176976203918457,
"max": 0.3096844553947449,
"mean": 0.0001122704561566934,
"std": 0.040681492537260056,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10546202212572098,
"max": 0.02664944902062416,
"mean": -0.02952582947909832,
"std": 0.01794532686471939,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.3392390310764313,
"max": 0.3297179937362671,
"mean": 5.245600186754018e-05,
"std": 0.034412626177072525,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.181877002120018,
"max": 0.042341288179159164,
"mean": -0.0010600005043670535,
"std": 0.01721755787730217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.3254714906215668,
"max": 0.6875306367874146,
"mean": 0.5112907886505127,
"std": 0.03710601106286049,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23404580354690552,
"max": 0.22564062476158142,
"mean": -3.628679769462906e-05,
"std": 0.03917597234249115,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11565262079238892,
"max": 0.13205118477344513,
"mean": 0.00015428723418153822,
"std": 0.029200663790106773,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3531610369682312,
"max": 0.28566646575927734,
"mean": 7.01215958542889e-06,
"std": 0.03924458101391792,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.1371684074401855,
"max": 3.5479142665863037,
"mean": -0.011608399450778961,
"std": 0.6831862926483154,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21112798154354095,
"max": 0.20956169068813324,
"mean": 3.4640430385479704e-05,
"std": 0.034484706819057465,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.0358961820602417,
"max": 0.04827914386987686,
"mean": 0.000792390201240778,
"std": 0.012867480516433716,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21074581146240234,
"max": 0.19335627555847168,
"mean": -1.3081223642075201e-06,
"std": 0.031695783138275146,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18677425384521484,
"max": 0.17732204496860504,
"mean": -0.002835639752447605,
"std": 0.05864328145980835,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4744804799556732,
"max": 1.0434356927871704,
"mean": 0.6514811515808105,
"std": 0.04996025562286377,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24828216433525085,
"max": 0.3291241526603699,
"mean": 0.00018075907428283244,
"std": 0.04056989774107933,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12511543929576874,
"max": 0.024807237088680267,
"mean": -0.03050871379673481,
"std": 0.017624877393245697,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.42125385999679565,
"max": 0.4822184443473816,
"mean": -1.4134266166365705e-06,
"std": 0.03539677709341049,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.1517147570848465,
"max": 0.043470486998558044,
"mean": 4.9440553993918e-05,
"std": 0.014891887083649635,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31546592712402344,
"max": 0.6829473972320557,
"mean": 0.552940845489502,
"std": 0.0407881923019886,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20658527314662933,
"max": 0.2199694663286209,
"mean": 3.1865805794950575e-05,
"std": 0.03829915076494217,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.1380588412284851,
"max": 0.11287239193916321,
"mean": 2.8096917958464473e-05,
"std": 0.025843404233455658,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.4030921161174774,
"max": 0.37124574184417725,
"mean": 2.583605601103045e-05,
"std": 0.03817982226610184,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7753050327301025,
"max": 2.8720550537109375,
"mean": 0.001174271572381258,
"std": 0.5172262787818909,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.203634575009346,
"max": 0.19783173501491547,
"mean": 2.9641731089213863e-05,
"std": 0.034296903759241104,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.050782062113285065,
"max": 0.039943333715200424,
"mean": -0.00042034429498016834,
"std": 0.01341927982866764,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19659629464149475,
"max": 0.20229847729206085,
"mean": -1.2495337614382152e-05,
"std": 0.03180486336350441,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19323131442070007,
"max": 0.19526611268520355,
"mean": -0.002963971346616745,
"std": 0.06255338340997696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.34901732206344604,
"max": 1.0851324796676636,
"mean": 0.6672203540802002,
"std": 0.055461570620536804,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22590196132659912,
"max": 0.2515060603618622,
"mean": 0.0003586675738915801,
"std": 0.040759582072496414,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09132344275712967,
"max": 0.043738022446632385,
"mean": -0.030089886859059334,
"std": 0.017626678571105003,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.35337263345718384,
"max": 0.30428504943847656,
"mean": -4.39239593106322e-05,
"std": 0.03712212294340134,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16189776360988617,
"max": 0.06336814165115356,
"mean": -8.093340147752315e-05,
"std": 0.019419532269239426,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.3486655354499817,
"max": 0.7230467796325684,
"mean": 0.5424184799194336,
"std": 0.03920904919505119,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21948550641536713,
"max": 0.22342580556869507,
"mean": -1.1189426913915668e-05,
"std": 0.039230361580848694,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11856742948293686,
"max": 0.17064979672431946,
"mean": 0.0002859297674149275,
"std": 0.025129808112978935,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24677123129367828,
"max": 0.30096495151519775,
"mean": -3.686630952870473e-05,
"std": 0.03892983868718147,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5091044902801514,
"max": 3.718792676925659,
"mean": 0.01584971882402897,
"std": 0.7831407189369202,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21897639334201813,
"max": 0.23756206035614014,
"mean": -1.3331029549590312e-05,
"std": 0.036302708089351654,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04721689596772194,
"max": 0.05136079713702202,
"mean": 0.00047709030332043767,
"std": 0.013516037724912167,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.2142534703016281,
"max": 0.21756578981876373,
"mean": 5.647125362884253e-05,
"std": 0.03361497074365616,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21157103776931763,
"max": 0.23160234093666077,
"mean": -0.005100839305669069,
"std": 0.06188952922821045,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.3621511459350586,
"max": 1.1025023460388184,
"mean": 0.6993520259857178,
"std": 0.05383123829960823,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23499585688114166,
"max": 0.2451109141111374,
"mean": 0.00046343874419108033,
"std": 0.04126851260662079,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.0981631875038147,
"max": 0.06831478327512741,
"mean": -0.031439878046512604,
"std": 0.01814098283648491,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.3021915853023529,
"max": 0.3518403172492981,
"mean": -8.213460387196392e-05,
"std": 0.04027426242828369,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.1524408757686615,
"max": 0.14984285831451416,
"mean": 0.0002571163640823215,
"std": 0.02304430864751339,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9986459016799927,
"max": 1.0047640800476074,
"mean": 0.9998321533203125,
"std": 0.000813807244412601,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031263623386621475,
"max": 0.03126571327447891,
"mean": -1.928813617269043e-05,
"std": 0.01804114319384098,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031225843355059624,
"max": 0.030984507873654366,
"mean": -0.001084179850295186,
"std": 0.01795078068971634,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031264401972293854,
"max": 0.03126936033368111,
"mean": 3.5438486065686448e-06,
"std": 0.018041551113128662,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031160537153482437,
"max": 0.031171930953860283,
"mean": 0.00033398409141227603,
"std": 0.01806296594440937,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0006154034635983407,
"max": 0.00041452725417912006,
"mean": 1.3732544630329357e-06,
"std": 0.00013773542013950646,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9981350898742676,
"max": 1.0061345100402832,
"mean": 1.0003111362457275,
"std": 0.0018558463780209422,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03275563195347786,
"max": 0.032837994396686554,
"mean": -6.685876542178448e-06,
"std": 0.018042754381895065,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03275851905345917,
"max": 0.03259003907442093,
"mean": -0.00013117710477672517,
"std": 0.017956379801034927,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.0011762815993279219,
"max": 0.0011538960970938206,
"mean": 3.6382635926202056e-07,
"std": 0.00021428015315905213,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0005257476586848497,
"max": 0.0003992951533291489,
"mean": 2.2647066089120926e-06,
"std": 0.00012679416977334768,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3831771910190582,
"max": 0.7203002572059631,
"mean": 0.5807632207870483,
"std": 0.039030127227306366,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.238657608628273,
"max": 0.1965981125831604,
"mean": 2.6105446522706188e-05,
"std": 0.03746547922492027,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11904074251651764,
"max": 0.16665399074554443,
"mean": 0.0009819172555580735,
"std": 0.027577750384807587,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.2464642971754074,
"max": 0.5006471276283264,
"mean": -5.0186910812044516e-05,
"std": 0.03762289881706238,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.946474552154541,
"max": 3.7734150886535645,
"mean": -0.0035824859514832497,
"std": 0.681806743144989,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22754359245300293,
"max": 0.25217491388320923,
"mean": -1.1530558367667254e-05,
"std": 0.03743445873260498,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07175272703170776,
"max": 0.08072981238365173,
"mean": -0.0005130038480274379,
"std": 0.015667041763663292,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22810040414333344,
"max": 0.2579977512359619,
"mean": -2.8758044209098443e-05,
"std": 0.03542134538292885,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20080700516700745,
"max": 0.2153109759092331,
"mean": -0.005534037947654724,
"std": 0.0683637484908104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4053238332271576,
"max": 1.1908336877822876,
"mean": 0.7380030155181885,
"std": 0.05547412484884262,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.22124385833740234,
"max": 0.24569396674633026,
"mean": 0.0005211688112467527,
"std": 0.041335880756378174,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10345371812582016,
"max": 0.024234607815742493,
"mean": -0.032675523310899734,
"std": 0.018910475075244904,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.4498954117298126,
"max": 0.42273956537246704,
"mean": -0.00043416087282821536,
"std": 0.04689621180295944,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2517021596431732,
"max": 0.4706237316131592,
"mean": 0.0032027317211031914,
"std": 0.04455312713980675,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3169791102409363,
"max": 0.3331950604915619,
"mean": -2.5209596060449257e-05,
"std": 0.021287448704242706,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3245099186897278,
"max": 0.6862163543701172,
"mean": 0.5710394978523254,
"std": 0.04481911659240723,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.1645577996969223,
"max": 0.17449714243412018,
"mean": -4.883324072579853e-05,
"std": 0.0331808440387249,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18696466088294983,
"max": 0.14305275678634644,
"mean": 4.307446943130344e-05,
"std": 0.029701771214604378,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.381203293800354,
"max": 0.24647706747055054,
"mean": -9.961708201444708e-06,
"std": 0.032761868089437485,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6597650051116943,
"max": 3.293627977371216,
"mean": -0.014285150915384293,
"std": 0.9855467677116394,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23503181338310242,
"max": 0.24772128462791443,
"mean": -1.80145725607872e-05,
"std": 0.04169723764061928,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07274845242500305,
"max": 0.15466810762882233,
"mean": 0.0006658544880338013,
"std": 0.025178011506795883,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2665092945098877,
"max": 0.2483654022216797,
"mean": -1.536182753625326e-05,
"std": 0.04013803228735924,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.1897987425327301,
"max": 0.19495300948619843,
"mean": -0.001235135248862207,
"std": 0.06669139117002487,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32910633087158203,
"max": 1.0014653205871582,
"mean": 0.7192941308021545,
"std": 0.05263138189911842,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23197908699512482,
"max": 0.24564941227436066,
"mean": 0.0001828196254791692,
"std": 0.04089989513158798,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11449356377124786,
"max": 0.019026821479201317,
"mean": -0.042487140744924545,
"std": 0.018874552100896835,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.39025211334228516,
"max": 0.40785497426986694,
"mean": -2.1506561097339727e-05,
"std": 0.04853347688913345,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6939337849617004,
"max": 0.4130322337150574,
"mean": 0.0008477974915876985,
"std": 0.06032131612300873,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0010383415501564741,
"max": 1.0005052089691162,
"mean": 0.00048820566735230386,
"std": 0.02208903431892395,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9979493021965027,
"max": 1.0028773546218872,
"mean": 0.9996361136436462,
"std": 0.0005558156408369541,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.031263865530490875,
"max": 0.03126693516969681,
"mean": -2.1029807612649165e-05,
"std": 0.018032483756542206,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031225642189383507,
"max": 0.031231923028826714,
"mean": -0.000677043863106519,
"std": 0.017827108502388,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.031264521181583405,
"max": 0.03126373142004013,
"mean": -8.835060725687072e-06,
"std": 0.018031509593129158,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031228171661496162,
"max": 0.031247133389115334,
"mean": -0.0007299243006855249,
"std": 0.017942015081644058,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.00041853971197269857,
"max": 0.0003325868456158787,
"mean": -3.1447550554730697e-06,
"std": 0.0001163617562269792,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9978473782539368,
"max": 1.0059432983398438,
"mean": 0.9999491572380066,
"std": 0.001859705662354827,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03244250267744064,
"max": 0.0323757641017437,
"mean": -1.7303907497989712e-06,
"std": 0.018027959391474724,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.032130636274814606,
"max": 0.03116563893854618,
"mean": -0.0003740063984878361,
"std": 0.01804370991885662,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0012801800621673465,
"max": 0.0011148827616125345,
"mean": -8.956569672591286e-07,
"std": 0.00020970198966097087,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.0003423716698307544,
"max": 0.00029734382405877113,
"mean": -3.7682302718167193e-06,
"std": 0.00010476629540789872,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.2343551367521286,
"max": 0.2724533975124359,
"mean": 6.777756425435655e-06,
"std": 0.018809394910931587,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32130253314971924,
"max": 0.6949947476387024,
"mean": 0.5816991329193115,
"std": 0.04608374834060669,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18193963170051575,
"max": 0.19776132702827454,
"mean": -1.1586925211304333e-05,
"std": 0.033183593302965164,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16079005599021912,
"max": 0.12958164513111115,
"mean": -0.0010761492885649204,
"std": 0.03415785729885101,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.33248019218444824,
"max": 0.31138068437576294,
"mean": -1.0150852176593617e-05,
"std": 0.0322343148291111,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.811703681945801,
"max": 8.77199935913086,
"mean": 0.09351971745491028,
"std": 1.6208088397979736,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23395448923110962,
"max": 0.24196705222129822,
"mean": 4.150588938500732e-05,
"std": 0.04085612669587135,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07599986344575882,
"max": 0.06580105423927307,
"mean": 0.0004830547550227493,
"std": 0.019416898488998413,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24603235721588135,
"max": 0.23429378867149353,
"mean": -3.1053496059030294e-06,
"std": 0.039430778473615646,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16335651278495789,
"max": 0.16123652458190918,
"mean": 0.001627025194466114,
"std": 0.0652812197804451,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5569519996643066,
"max": 0.9448988437652588,
"mean": 0.712960422039032,
"std": 0.040366582572460175,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.2282220721244812,
"max": 0.255278617143631,
"mean": -4.5689772377954796e-05,
"std": 0.04057461395859718,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.1351189911365509,
"max": 0.02213732711970806,
"mean": -0.04135933890938759,
"std": 0.018408460542559624,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4218907952308655,
"max": 0.39247259497642517,
"mean": -4.45842306362465e-06,
"std": 0.04778381064534187,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6081869602203369,
"max": 0.6523037552833557,
"mean": 0.0015862288419157267,
"std": 0.0568697564303875,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.25164270401000977,
"max": 0.32068535685539246,
"mean": -6.094380296417512e-06,
"std": 0.019612763077020645,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.3596651554107666,
"max": 0.6836386322975159,
"mean": 0.5707623958587646,
"std": 0.04307318106293678,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.2204994410276413,
"max": 0.17691564559936523,
"mean": -3.469674993539229e-05,
"std": 0.034298643469810486,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16342805325984955,
"max": 0.23329652845859528,
"mean": 0.0003627383557613939,
"std": 0.03284167870879173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.26406827569007874,
"max": 0.24012491106987,
"mean": -5.2815768867731094e-05,
"std": 0.033897485584020615,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.859966278076172,
"max": 5.0964674949646,
"mean": 0.04393793269991875,
"std": 1.230094075202942,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.2463618665933609,
"max": 0.250487744808197,
"mean": 7.235530210891739e-05,
"std": 0.04398680850863457,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06267692148685455,
"max": 0.054532695561647415,
"mean": 0.000642440456431359,
"std": 0.017191536724567413,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2865088880062103,
"max": 0.272175133228302,
"mean": -5.016334762331098e-05,
"std": 0.042984914034605026,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.1612030565738678,
"max": 0.1705736219882965,
"mean": -0.0028862706385552883,
"std": 0.05929599329829216,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5197344422340393,
"max": 0.9341347813606262,
"mean": 0.7135534286499023,
"std": 0.03866534307599068,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23811031877994537,
"max": 0.24873413145542145,
"mean": 0.0004648095346055925,
"std": 0.04045360907912254,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14526499807834625,
"max": 0.041103385388851166,
"mean": -0.03970393165946007,
"std": 0.02056412398815155,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5330354571342468,
"max": 0.5828887820243835,
"mean": 5.7578072301112115e-06,
"std": 0.04885943979024887,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5195844769477844,
"max": 0.4939325749874115,
"mean": 0.002366485306993127,
"std": 0.05347662419080734,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.273802787065506,
"max": 0.3155968487262726,
"mean": 2.01077523342974e-06,
"std": 0.02004941552877426,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36614885926246643,
"max": 0.7128685116767883,
"mean": 0.5932222604751587,
"std": 0.04609934985637665,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21103930473327637,
"max": 0.19931277632713318,
"mean": 3.062984978896566e-05,
"std": 0.03486598655581474,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18738499283790588,
"max": 0.20401518046855927,
"mean": 0.0009546762448735535,
"std": 0.031527843326330185,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.2900747060775757,
"max": 0.3402419686317444,
"mean": -4.711254223366268e-05,
"std": 0.03458685800433159,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.881408214569092,
"max": 3.3909339904785156,
"mean": 0.014485932886600494,
"std": 0.8588526248931885,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22469207644462585,
"max": 0.2501186430454254,
"mean": -3.7895424611633644e-06,
"std": 0.04222952574491501,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05539275333285332,
"max": 0.046729691326618195,
"mean": -1.6585952835157514e-05,
"std": 0.01585092395544052,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.29304519295692444,
"max": 0.2904603183269501,
"mean": -7.356060450547375e-06,
"std": 0.04194435849785805,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.1251518875360489,
"max": 0.25925886631011963,
"mean": -0.0032416037283837795,
"std": 0.05317998677492142,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4565337300300598,
"max": 0.8454437851905823,
"mean": 0.7055786848068237,
"std": 0.035420604050159454,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5118804574012756,
"max": 0.34804508090019226,
"mean": 0.00034280645195394754,
"std": 0.040198732167482376,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18613915145397186,
"max": 0.03958306089043617,
"mean": -0.03939869999885559,
"std": 0.021371137350797653,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.544677197933197,
"max": 0.5565076470375061,
"mean": -7.158219523262233e-05,
"std": 0.050734151154756546,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5122924447059631,
"max": 0.6649084091186523,
"mean": 0.002443553414195776,
"std": 0.04954148083925247,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3326307237148285,
"max": 0.2655903100967407,
"mean": 3.417561856622342e-06,
"std": 0.01938662678003311,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.32189854979515076,
"max": 0.7676428556442261,
"mean": 0.6510834097862244,
"std": 0.045412834733724594,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.24963903427124023,
"max": 0.21975325047969818,
"mean": -2.1360538084991276e-06,
"std": 0.03650053218007088,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.3272336423397064,
"max": 0.2872598171234131,
"mean": -0.000690902175847441,
"std": 0.038575589656829834,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3104217052459717,
"max": 0.3704308867454529,
"mean": 6.501967436634004e-05,
"std": 0.03624104708433151,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.723941326141357,
"max": 5.815830707550049,
"mean": 0.03795095533132553,
"std": 1.4143388271331787,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22184839844703674,
"max": 0.20582044124603271,
"mean": -7.514897151850164e-05,
"std": 0.04248502478003502,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07779642939567566,
"max": 0.05152571201324463,
"mean": -0.0009286667918786407,
"std": 0.016416585072875023,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.33085039258003235,
"max": 0.3292792737483978,
"mean": -4.624932898877887e-06,
"std": 0.04279141500592232,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2850324511528015,
"max": 0.11214210838079453,
"mean": -0.0012058319989591837,
"std": 0.04702144116163254,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.48610714077949524,
"max": 0.8880516886711121,
"mean": 0.7374852299690247,
"std": 0.038454823195934296,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3623265027999878,
"max": 0.2744399905204773,
"mean": 5.1268329116282985e-05,
"std": 0.04064424708485603,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24789389967918396,
"max": 0.046399183571338654,
"mean": -0.0392770953476429,
"std": 0.023303059861063957,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6267192959785461,
"max": 0.5975406765937805,
"mean": -6.142957136034966e-05,
"std": 0.05311630666255951,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7102671265602112,
"max": 0.2661624252796173,
"mean": 0.0009175186860375106,
"std": 0.05124976858496666,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3433755040168762,
"max": 0.30368152260780334,
"mean": 1.5963701116561424e-07,
"std": 0.01913503371179104,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34989097714424133,
"max": 0.7839252948760986,
"mean": 0.6388714909553528,
"std": 0.04933994635939598,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.2052908092737198,
"max": 0.20688343048095703,
"mean": -5.992479418637231e-05,
"std": 0.03769543766975403,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.2588193416595459,
"max": 0.26830655336380005,
"mean": -0.00039892495260573924,
"std": 0.044624269008636475,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3542262613773346,
"max": 0.3225662410259247,
"mean": -6.961288363527274e-06,
"std": 0.037203844636678696,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.267129421234131,
"max": 4.20892858505249,
"mean": -0.02641383744776249,
"std": 1.0074299573898315,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23873740434646606,
"max": 0.24359266459941864,
"mean": -2.525941454223357e-05,
"std": 0.04320967569947243,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06238892674446106,
"max": 0.056785948574543,
"mean": 0.0003448878414928913,
"std": 0.014156854711472988,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.4372255206108093,
"max": 0.37362250685691833,
"mean": 1.442125540052075e-05,
"std": 0.04412117227911949,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09657814353704453,
"max": 0.1761663407087326,
"mean": -0.0006602209759876132,
"std": 0.03516199812293053,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4218268096446991,
"max": 1.070821762084961,
"mean": 0.7484229803085327,
"std": 0.042183347046375275,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.26658228039741516,
"max": 0.2970208525657654,
"mean": -7.946729601826519e-05,
"std": 0.04080420732498169,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18536308407783508,
"max": 0.04367092251777649,
"mean": -0.0368281751871109,
"std": 0.02562659978866577,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4574359655380249,
"max": 0.4870511591434479,
"mean": 4.341827298048884e-05,
"std": 0.05420948192477226,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.2867445945739746,
"max": 0.5520338416099548,
"mean": -0.0008801904041320086,
"std": 0.04785289987921715,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2927917540073395,
"max": 0.32283690571784973,
"mean": 6.15146973359515e-06,
"std": 0.019968591630458832,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2908935844898224,
"max": 0.7611098885536194,
"mean": 0.6508486270904541,
"std": 0.05218230187892914,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.2437344491481781,
"max": 0.2615884840488434,
"mean": -6.006965122651309e-06,
"std": 0.03961160406470299,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.267729252576828,
"max": 0.20025481283664703,
"mean": -0.0008811865700408816,
"std": 0.05178782343864441,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.27242225408554077,
"max": 0.25395235419273376,
"mean": 4.551842721411958e-06,
"std": 0.03870858997106552,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.978915214538574,
"max": 15.964410781860352,
"mean": 0.033282238990068436,
"std": 1.9907665252685547,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20730799436569214,
"max": 0.22610057890415192,
"mean": -7.21659671398811e-05,
"std": 0.0405535064637661,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06938357651233673,
"max": 0.06327643245458603,
"mean": 0.00015629694098606706,
"std": 0.014746708795428276,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46517401933670044,
"max": 0.320604145526886,
"mean": 1.968832475540694e-05,
"std": 0.040588606148958206,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06422771513462067,
"max": 0.11537671089172363,
"mean": 0.0011921785771846771,
"std": 0.024717185646295547,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.3747236132621765,
"max": 0.9333999156951904,
"mean": 0.7509297132492065,
"std": 0.04027929529547691,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2798289656639099,
"max": 0.2732216715812683,
"mean": -0.00016840256284922361,
"std": 0.0409947969019413,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19888785481452942,
"max": 0.05115103721618652,
"mean": -0.0320354662835598,
"std": 0.025122012943029404,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6584249138832092,
"max": 0.5358718037605286,
"mean": -4.888750845566392e-05,
"std": 0.05284606292843819,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19323143362998962,
"max": 0.5829473733901978,
"mean": -0.0005128738121129572,
"std": 0.041099581867456436,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41776803135871887,
"max": 0.3719577491283417,
"mean": 6.155986739031505e-06,
"std": 0.02162076160311699,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.2142574042081833,
"max": 0.7495372891426086,
"mean": 0.6495493054389954,
"std": 0.05440565198659897,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.2096448391675949,
"max": 0.1958194077014923,
"mean": 4.026427632197738e-05,
"std": 0.03946169093251228,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.3297625780105591,
"max": 0.25971850752830505,
"mean": -0.003232162445783615,
"std": 0.05629448592662811,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.2059866487979889,
"max": 0.25485166907310486,
"mean": 5.424032860901207e-05,
"std": 0.0385642871260643,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.252347469329834,
"max": 6.942240238189697,
"mean": 0.0483565516769886,
"std": 1.3863071203231812,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20988626778125763,
"max": 0.23036901652812958,
"mean": -5.1103716032230295e-06,
"std": 0.04131251201033592,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04387219622731209,
"max": 0.036041487008333206,
"mean": 6.907794158905745e-07,
"std": 0.012801294215023518,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.3976612091064453,
"max": 0.3448401689529419,
"mean": -5.557302574743517e-05,
"std": 0.04238886013627052,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.055147796869277954,
"max": 0.06285040080547333,
"mean": 0.00036463249125517905,
"std": 0.018676765263080597,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3504812717437744,
"max": 1.0465654134750366,
"mean": 0.7894250154495239,
"std": 0.048819400370121,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3335842788219452,
"max": 0.3860694169998169,
"mean": -0.00016952167788986117,
"std": 0.041479866951704025,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15750724077224731,
"max": 0.05909515544772148,
"mean": -0.03184274956583977,
"std": 0.025149760767817497,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6963300704956055,
"max": 0.4692156910896301,
"mean": -8.906715083867311e-05,
"std": 0.05179242789745331,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.2484702616930008,
"max": 0.32900601625442505,
"mean": -0.0002533062652219087,
"std": 0.041455697268247604,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2871420085430145,
"max": 0.35027819871902466,
"mean": -2.14410374610452e-06,
"std": 0.024236002936959267,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.1965855360031128,
"max": 0.7816711664199829,
"mean": 0.6702626943588257,
"std": 0.05871051922440529,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22907193005084991,
"max": 0.23129022121429443,
"mean": -1.9948049157392234e-05,
"std": 0.040437448769807816,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22012899816036224,
"max": 0.24119356274604797,
"mean": 0.0007787380600348115,
"std": 0.0558554045855999,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21590574085712433,
"max": 0.22671166062355042,
"mean": -7.169770105974749e-05,
"std": 0.03937356546521187,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.917876243591309,
"max": 9.080994606018066,
"mean": -0.001221940852701664,
"std": 1.850203514099121,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2697039842605591,
"max": 0.2592160999774933,
"mean": 4.3639320210786536e-05,
"std": 0.03840581700205803,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.057751722633838654,
"max": 0.05785238742828369,
"mean": 0.0003506582579575479,
"std": 0.014723116531968117,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.26493385434150696,
"max": 0.28856679797172546,
"mean": -6.166309321997687e-05,
"std": 0.0390719398856163,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04392173886299133,
"max": 0.037354789674282074,
"mean": -9.023403254104778e-05,
"std": 0.013362305238842964,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.339423805475235,
"max": 1.0940691232681274,
"mean": 0.8637771010398865,
"std": 0.06392761319875717,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.4232974052429199,
"max": 0.418984055519104,
"mean": 0.0003126158844679594,
"std": 0.043500375002622604,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.2149772197008133,
"max": 0.1709900051355362,
"mean": -0.02949333004653454,
"std": 0.03195162117481232,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5998314619064331,
"max": 0.5601617097854614,
"mean": -0.00015080120647326112,
"std": 0.053445130586624146,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17873013019561768,
"max": 0.3772476017475128,
"mean": 0.001360590336844325,
"std": 0.03732540085911751,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.39441367983818054,
"max": 0.36907026171684265,
"mean": 3.6978712159907445e-05,
"std": 0.02861737459897995,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2903454899787903,
"max": 0.8293581604957581,
"mean": 0.7055460214614868,
"std": 0.0678996667265892,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9263197779655457,
"max": 1.0265021324157715,
"mean": -2.6120340407942422e-05,
"std": 0.04762475937604904,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8798882961273193,
"max": 0.8164214491844177,
"mean": -0.0003084776981268078,
"std": 0.09563522785902023,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.2697499990463257,
"max": 0.24099533259868622,
"mean": -2.2782449377700686e-05,
"std": 0.03895165026187897,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.775798797607422,
"max": 22.882915496826172,
"mean": -0.09193148463964462,
"std": 4.075654983520508,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22787398099899292,
"max": 0.24508967995643616,
"mean": -2.5707324311952107e-05,
"std": 0.038637157529592514,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06037602946162224,
"max": 0.04592515528202057,
"mean": -0.00014296159497462213,
"std": 0.01469582598656416,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.33830153942108154,
"max": 0.3749238848686218,
"mean": 7.406164513668045e-06,
"std": 0.04081294313073158,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04650312289595604,
"max": 0.19583187997341156,
"mean": 0.00027365636196918786,
"std": 0.01356838084757328,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.374420702457428,
"max": 1.1316319704055786,
"mean": 0.8900625109672546,
"std": 0.0640411525964737,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.4478547275066376,
"max": 0.5426859259605408,
"mean": 2.472557571309153e-05,
"std": 0.045565616339445114,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22437457740306854,
"max": 0.08822718262672424,
"mean": -0.03203187137842178,
"std": 0.037792954593896866,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7258070111274719,
"max": 0.689643919467926,
"mean": 3.430668584769592e-05,
"std": 0.05177781358361244,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.17477792501449585,
"max": 0.2187574803829193,
"mean": 4.145095590502024e-05,
"std": 0.03179146349430084,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.34034836292266846,
"max": 0.37395596504211426,
"mean": 4.299964348319918e-05,
"std": 0.034139711409807205,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3176548182964325,
"max": 1.2885946035385132,
"mean": 0.6015164256095886,
"std": 0.08361472934484482,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2833174467086792,
"max": 0.2604674696922302,
"mean": -2.836968405972584e-06,
"std": 0.0359807163476944,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23581622540950775,
"max": 0.20569506287574768,
"mean": 0.00023786764359101653,
"std": 0.05603973567485809,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.4355442523956299,
"max": 0.3252858817577362,
"mean": 2.4317849238286726e-05,
"std": 0.03413137421011925,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.551609039306641,
"max": 7.321235179901123,
"mean": -0.00739276222884655,
"std": 0.7000025510787964,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.3440183103084564,
"max": 0.36360201239585876,
"mean": 0.00010336286504752934,
"std": 0.04782794788479805,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07385823875665665,
"max": 0.060460202395915985,
"mean": 0.0009339989046566188,
"std": 0.014948051422834396,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2559783458709717,
"max": 0.2868276536464691,
"mean": 4.447174433153123e-06,
"std": 0.041554734110832214,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05538920685648918,
"max": 0.06289947777986526,
"mean": 0.0001379675231873989,
"std": 0.007169328164309263,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.4936121106147766,
"max": 1.2250889539718628,
"mean": 1.0134532451629639,
"std": 0.11746872216463089,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0936273336410522,
"max": 1.0470186471939087,
"mean": -4.9267873691860586e-05,
"std": 0.05240849778056145,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22356118261814117,
"max": 0.17290450632572174,
"mean": -0.0272555910050869,
"std": 0.03637368604540825,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8851283192634583,
"max": 0.9232462048530579,
"mean": -0.00014597055269405246,
"std": 0.05328158289194107,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17127959430217743,
"max": 0.38031327724456787,
"mean": 0.0033715758472681046,
"std": 0.03991725295782089,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7775956392288208,
"max": 0.7237375378608704,
"mean": 1.8900283976108767e-05,
"std": 0.04616079851984978,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3385881781578064,
"max": 1.4302620887756348,
"mean": 0.9483721852302551,
"std": 0.20680920779705048,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.745700478553772,
"max": 1.704361081123352,
"mean": 0.0002272462734254077,
"std": 0.15868504345417023,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.2009143829345703,
"max": 1.1011698246002197,
"mean": -0.009547756053507328,
"std": 0.20407216250896454,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4211972653865814,
"max": 0.42695388197898865,
"mean": 6.460870645241812e-05,
"std": 0.04801572859287262,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.76715850830078,
"max": 19.56202507019043,
"mean": -0.24856510758399963,
"std": 4.78177547454834,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32400673627853394,
"max": 0.4385600686073303,
"mean": -1.1902460755663924e-05,
"std": 0.046161260455846786,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.0341118723154068,
"max": 0.03712359443306923,
"mean": 0.0006423432496376336,
"std": 0.012920627370476723,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7036018371582031,
"max": 0.6655198335647583,
"mean": 4.3310083128744736e-05,
"std": 0.057881489396095276,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07232781499624252,
"max": 0.06761610507965088,
"mean": -0.00013295613462105393,
"std": 0.012923309579491615,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38030096888542175,
"max": 1.391922116279602,
"mean": 1.066575527191162,
"std": 0.21970626711845398,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6164926886558533,
"max": 0.717415988445282,
"mean": 0.00011193109094165266,
"std": 0.05802033841609955,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21991026401519775,
"max": 0.22539444267749786,
"mean": 0.006232057698071003,
"std": 0.049761686474084854,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6297550797462463,
"max": 0.8893491625785828,
"mean": 1.1787104085669853e-05,
"std": 0.023527706041932106,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5073941349983215,
"max": 0.47446364164352417,
"mean": -0.00302139762789011,
"std": 0.06935760378837585,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5377801060676575,
"max": 1.1812876462936401,
"mean": 0.7827885746955872,
"std": 0.09896031767129898,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2672213613986969,
"max": 0.21292650699615479,
"mean": -0.00022339042334351689,
"std": 0.05399598926305771,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23833607137203217,
"max": 0.014835306443274021,
"mean": -0.04396972805261612,
"std": 0.03436173498630524,
"sparsity": 0.0,
"shape": [
100
]
}
}
}