zombie501 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
e1ecbe5 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.4304574429988861,
"max": 0.2989666759967804,
"mean": -0.0025583612732589245,
"std": 0.042551927268505096,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06317814439535141,
"max": 0.10763632506132126,
"mean": 0.0005897035007365048,
"std": 0.03411067649722099,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4125778377056122,
"max": 0.8363006114959717,
"mean": -0.00021047875634394586,
"std": 0.024107400327920914,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.1154782623052597,
"max": 0.32146546244621277,
"mean": -0.0009399052942171693,
"std": 0.019577190279960632,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7917730808258057,
"max": 2.8704917430877686,
"mean": -0.0003648003621492535,
"std": 0.6153737306594849,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.27894294261932373,
"max": 0.38190174102783203,
"mean": 0.00042033716454170644,
"std": 0.042750339955091476,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.2222987860441208,
"max": 0.20967179536819458,
"mean": -0.00449405936524272,
"std": 0.04091016948223114,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4279242753982544,
"max": 0.47530120611190796,
"mean": 2.540943796702777e-06,
"std": 0.024509120732545853,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32545235753059387,
"max": 0.15698140859603882,
"mean": -0.0467013455927372,
"std": 0.051578979939222336,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.41039708256721497,
"max": 0.3545180857181549,
"mean": -0.00012633543519768864,
"std": 0.023601215332746506,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.2297646850347519,
"max": 0.26262199878692627,
"mean": -0.029148615896701813,
"std": 0.049347542226314545,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2546185553073883,
"max": 0.8200821876525879,
"mean": 0.5254418849945068,
"std": 0.08080805093050003,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.29693102836608887,
"max": 0.26530489325523376,
"mean": -0.00042408728040754795,
"std": 0.032104212790727615,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09274528920650482,
"max": 0.12482056021690369,
"mean": 0.0006486810743808746,
"std": 0.025742707774043083,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.29047587513923645,
"max": 0.28141430020332336,
"mean": -7.6991505920887e-05,
"std": 0.03093625046312809,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.899471282958984,
"max": 5.8142476081848145,
"mean": -0.009332108311355114,
"std": 1.2954597473144531,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.42482444643974304,
"max": 0.34377753734588623,
"mean": 9.762628906173632e-05,
"std": 0.02995302341878414,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028968220576643944,
"max": 0.027649197727441788,
"mean": -0.0003115592699032277,
"std": 0.012572345323860645,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.45394477248191833,
"max": 0.44869503378868103,
"mean": 2.2737156541552395e-05,
"std": 0.023855075240135193,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08868509531021118,
"max": 0.0911499559879303,
"mean": 0.002273137215524912,
"std": 0.019512129947543144,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.2666190564632416,
"max": 1.0562766790390015,
"mean": 0.531130313873291,
"std": 0.1044141948223114,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5744591355323792,
"max": 0.6083897948265076,
"mean": -0.00043104952783323824,
"std": 0.03859502077102661,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.1818137913942337,
"max": 0.045760128647089005,
"mean": -0.029441693797707558,
"std": 0.042590487748384476,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.166682481765747,
"max": 1.634623646736145,
"mean": 0.0003185438981745392,
"std": 0.02769385650753975,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16253960132598877,
"max": 0.2057240754365921,
"mean": -0.021116681396961212,
"std": 0.027940358966588974,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.2244873046875,
"max": 0.8436590433120728,
"mean": 0.48752978444099426,
"std": 0.07519952952861786,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.25530415773391724,
"max": 0.3058406710624695,
"mean": -9.383336873725057e-06,
"std": 0.03347048535943031,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09549209475517273,
"max": 0.11042480170726776,
"mean": 5.650718230754137e-05,
"std": 0.02698545530438423,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.2974618077278137,
"max": 0.295981764793396,
"mean": 5.020356547902338e-05,
"std": 0.03253836929798126,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.164300918579102,
"max": 5.084524154663086,
"mean": -0.0145945493131876,
"std": 1.1573816537857056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.3448942005634308,
"max": 0.3434945046901703,
"mean": 7.886815001256764e-05,
"std": 0.030058231204748154,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.036158282309770584,
"max": 0.03324951231479645,
"mean": -0.00014386117982212454,
"std": 0.013023010455071926,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.31528618931770325,
"max": 0.3752082884311676,
"mean": -2.1654177544405684e-05,
"std": 0.024055516347289085,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10527443885803223,
"max": 0.12188493460416794,
"mean": -0.001954286126419902,
"std": 0.0288428645581007,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.31180328130722046,
"max": 1.120958685874939,
"mean": 0.6662410497665405,
"std": 0.09774944931268692,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.872490644454956,
"max": 0.627565324306488,
"mean": 0.0016757093835622072,
"std": 0.047438349574804306,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27100008726119995,
"max": 0.03407798707485199,
"mean": -0.04660271108150482,
"std": 0.04059542715549469,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9202945232391357,
"max": 0.9643993973731995,
"mean": 0.0010207913583144546,
"std": 0.04070187732577324,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14455102384090424,
"max": 0.07482050359249115,
"mean": -0.009084243327379227,
"std": 0.025694938376545906,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.23976297676563263,
"max": 0.7124081254005432,
"mean": 0.4472041726112366,
"std": 0.05932378023862839,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.27300503849983215,
"max": 0.297477126121521,
"mean": 8.662666004966013e-06,
"std": 0.035474397242069244,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11903306841850281,
"max": 0.11846816539764404,
"mean": 0.0007502126973122358,
"std": 0.02760804258286953,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.28101953864097595,
"max": 0.27942612767219543,
"mean": -7.648450991837308e-05,
"std": 0.03510245680809021,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5096001625061035,
"max": 2.5215961933135986,
"mean": 0.026745397597551346,
"std": 0.586780309677124,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.22110240161418915,
"max": 0.27161508798599243,
"mean": 2.438401679683011e-06,
"std": 0.030731581151485443,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.033151235431432724,
"max": 0.031146494671702385,
"mean": 0.00011706411896739155,
"std": 0.012394252233207226,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23539645969867706,
"max": 0.23185278475284576,
"mean": 5.7256078434875235e-05,
"std": 0.025697633624076843,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13603141903877258,
"max": 0.1280086189508438,
"mean": -0.005497735925018787,
"std": 0.03996264934539795,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3547299802303314,
"max": 1.1723523139953613,
"mean": 0.7105399370193481,
"std": 0.10377444326877594,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6173876523971558,
"max": 0.5556272268295288,
"mean": 0.001160334562882781,
"std": 0.046114034950733185,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.18945953249931335,
"max": 0.024937259033322334,
"mean": -0.034846723079681396,
"std": 0.028622858226299286,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1309547424316406,
"max": 0.97038733959198,
"mean": 0.00035909086000174284,
"std": 0.04234256222844124,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5978560447692871,
"max": 0.06273925304412842,
"mean": -0.0048814816400408745,
"std": 0.028621360659599304,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3753381073474884,
"max": 0.9404851794242859,
"mean": 0.592466413974762,
"std": 0.06694933772087097,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3917763829231262,
"max": 0.36936038732528687,
"mean": 7.001425547059625e-05,
"std": 0.0371866449713707,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11900075525045395,
"max": 0.13653883337974548,
"mean": 0.0009160788613371551,
"std": 0.029187612235546112,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6190802454948425,
"max": 0.508792519569397,
"mean": 1.5223037735268008e-05,
"std": 0.036439377814531326,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.18681526184082,
"max": 8.788924217224121,
"mean": -0.10927566885948181,
"std": 1.6988582611083984,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.27652865648269653,
"max": 0.2397209107875824,
"mean": 5.228666486800648e-05,
"std": 0.03261314332485199,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.0514988899230957,
"max": 0.03946297615766525,
"mean": 9.359161776956171e-05,
"std": 0.012969369068741798,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23075971007347107,
"max": 0.23487111926078796,
"mean": -2.203527037636377e-05,
"std": 0.029389776289463043,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20423616468906403,
"max": 0.1052512601017952,
"mean": -0.004020487889647484,
"std": 0.03263992816209793,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.33965712785720825,
"max": 1.012444019317627,
"mean": 0.7007054090499878,
"std": 0.09675901383161545,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5647616982460022,
"max": 0.8335906267166138,
"mean": 0.0004150677123107016,
"std": 0.04229460284113884,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21212875843048096,
"max": 0.029963094741106033,
"mean": -0.03217349201440811,
"std": 0.026498712599277496,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7548851370811462,
"max": 0.719126284122467,
"mean": -1.581827746122144e-05,
"std": 0.036835212260484695,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.2634251117706299,
"max": 0.1063019409775734,
"mean": -0.0030143139883875847,
"std": 0.028873277828097343,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28394702076911926,
"max": 0.6950414180755615,
"mean": 0.4993884563446045,
"std": 0.04653454199433327,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.2782432436943054,
"max": 0.2338251918554306,
"mean": -0.00011091169290011749,
"std": 0.03875752165913582,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15358327329158783,
"max": 0.12643983960151672,
"mean": -0.0022276192903518677,
"std": 0.033326249569654465,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41438740491867065,
"max": 0.6594708561897278,
"mean": -1.851528577390127e-05,
"std": 0.039096731692552567,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.237917423248291,
"max": 4.722480773925781,
"mean": -0.020456865429878235,
"std": 1.0076923370361328,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.245052769780159,
"max": 0.20759740471839905,
"mean": 4.428692045621574e-05,
"std": 0.0339626781642437,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.034463901072740555,
"max": 0.04485860466957092,
"mean": -2.209081139881164e-05,
"std": 0.012639513239264488,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.2011018991470337,
"max": 0.20644338428974152,
"mean": -2.9357790481299162e-05,
"std": 0.03102092258632183,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.19982746243476868,
"max": 0.11318917572498322,
"mean": -0.0028952043503522873,
"std": 0.03453591465950012,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.36675214767456055,
"max": 1.0576648712158203,
"mean": 0.6704948544502258,
"std": 0.06640778481960297,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.39844217896461487,
"max": 0.5021068453788757,
"mean": -3.8750327803427354e-05,
"std": 0.04113020375370979,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12863779067993164,
"max": 0.026958497241139412,
"mean": -0.030533233657479286,
"std": 0.02188229374587536,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.449487566947937,
"max": 0.43325698375701904,
"mean": 7.53812346374616e-05,
"std": 0.03489059582352638,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.2675015926361084,
"max": 0.07307843118906021,
"mean": -0.0010904058581218123,
"std": 0.02313595451414585,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28754422068595886,
"max": 0.6852768659591675,
"mean": 0.5245310068130493,
"std": 0.04753505066037178,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22266238927841187,
"max": 0.22331833839416504,
"mean": 1.5918290955596603e-05,
"std": 0.038949232548475266,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13635052740573883,
"max": 0.10933808237314224,
"mean": 0.00024784280685707927,
"std": 0.029207777231931686,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37493839859962463,
"max": 0.43759685754776,
"mean": -9.403542208019644e-06,
"std": 0.03928738459944725,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8458573818206787,
"max": 4.999326705932617,
"mean": 0.009741819463670254,
"std": 0.8452204465866089,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22270528972148895,
"max": 0.22029587626457214,
"mean": -3.1911031328490935e-07,
"std": 0.034410301595926285,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.043785978108644485,
"max": 0.03592836111783981,
"mean": -0.0002596271806396544,
"std": 0.012078739702701569,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21270592510700226,
"max": 0.18842868506908417,
"mean": -1.7000973457470536e-05,
"std": 0.03153671696782112,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.1809375286102295,
"max": 0.12074985355138779,
"mean": -0.002395304851233959,
"std": 0.04127994924783707,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.422917902469635,
"max": 0.9417884349822998,
"mean": 0.6626536250114441,
"std": 0.05681688338518143,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.3708776533603668,
"max": 0.4765470623970032,
"mean": -8.20929926703684e-05,
"std": 0.04088940471410751,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20849654078483582,
"max": 0.0273736622184515,
"mean": -0.03023475781083107,
"std": 0.021363815292716026,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3406715989112854,
"max": 0.7341561913490295,
"mean": 8.243846968980506e-05,
"std": 0.03476623818278313,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.24016188085079193,
"max": 0.05046152323484421,
"mean": -0.0011865879641845822,
"std": 0.020459504798054695,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.30588385462760925,
"max": 0.6534701585769653,
"mean": 0.5251248478889465,
"std": 0.04612228646874428,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30431559681892395,
"max": 0.21719232201576233,
"mean": 6.998516619205475e-05,
"std": 0.039497170597314835,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14912384748458862,
"max": 0.13098323345184326,
"mean": 0.0003266759740654379,
"std": 0.03045588731765747,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.25694772601127625,
"max": 0.201896533370018,
"mean": 3.129036849713884e-05,
"std": 0.0394882932305336,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.336271047592163,
"max": 2.375894784927368,
"mean": -0.026241114363074303,
"std": 0.44977155327796936,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18857863545417786,
"max": 0.21028850972652435,
"mean": 3.711117460625246e-05,
"std": 0.034793779253959656,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03168531507253647,
"max": 0.03566686809062958,
"mean": -0.00019767877529375255,
"std": 0.012288626283407211,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18829987943172455,
"max": 0.17024517059326172,
"mean": -6.836466491222382e-05,
"std": 0.03217046335339546,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13942238688468933,
"max": 0.1372329592704773,
"mean": -0.002514950931072235,
"std": 0.05129847675561905,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4670739769935608,
"max": 0.955595850944519,
"mean": 0.6688634157180786,
"std": 0.05277201533317566,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.3244037926197052,
"max": 0.309257835149765,
"mean": -1.045628778229002e-06,
"std": 0.04094540327787399,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.1248614490032196,
"max": 0.025666970759630203,
"mean": -0.030689720064401627,
"std": 0.019823001697659492,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.43948638439178467,
"max": 0.44534069299697876,
"mean": 9.591381240170449e-05,
"std": 0.035119153559207916,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.2246266007423401,
"max": 0.051820773631334305,
"mean": -0.0011818428756669164,
"std": 0.018466750159859657,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.33914706110954285,
"max": 0.7398536205291748,
"mean": 0.5587007999420166,
"std": 0.04139573872089386,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.2729904353618622,
"max": 0.27884039282798767,
"mean": 2.0351768398541026e-05,
"std": 0.04105766862630844,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13680818676948547,
"max": 0.13977055251598358,
"mean": 0.0004918644553981721,
"std": 0.02663181535899639,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.49051523208618164,
"max": 0.35575586557388306,
"mean": 8.911330223781988e-05,
"std": 0.04069535806775093,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.2970781326293945,
"max": 1.745163917541504,
"mean": -0.021079789847135544,
"std": 0.500128984451294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.2181096374988556,
"max": 0.1974443644285202,
"mean": -4.0170674765249714e-05,
"std": 0.03423338383436203,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.041142482310533524,
"max": 0.03885917738080025,
"mean": -0.0001360031747026369,
"std": 0.012883774936199188,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17761866748332977,
"max": 0.1828862875699997,
"mean": 4.801471368409693e-05,
"std": 0.03155674412846565,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.1799207329750061,
"max": 0.18389682471752167,
"mean": -0.0022146617993712425,
"std": 0.05482979863882065,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.474190354347229,
"max": 1.0258487462997437,
"mean": 0.6452326774597168,
"std": 0.05035318806767464,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.27163514494895935,
"max": 0.3091295659542084,
"mean": 0.00011244519555475563,
"std": 0.04068158566951752,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10526852309703827,
"max": 0.026741184294223785,
"mean": -0.029519207775592804,
"std": 0.01793486438691616,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.33932313323020935,
"max": 0.329169899225235,
"mean": 5.2667885029222816e-05,
"std": 0.03441279008984566,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18180307745933533,
"max": 0.042509548366069794,
"mean": -0.0010597179643809795,
"std": 0.017209293320775032,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.32517459988594055,
"max": 0.6865665912628174,
"mean": 0.511164128780365,
"std": 0.03695276752114296,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23393133282661438,
"max": 0.2253761738538742,
"mean": -3.613880107877776e-05,
"std": 0.039175428450107574,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11511484533548355,
"max": 0.13181191682815552,
"mean": 0.00015029555652290583,
"std": 0.029160132631659508,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.35229772329330444,
"max": 0.28487107157707214,
"mean": 6.5603690018178895e-06,
"std": 0.03924452140927315,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.132349967956543,
"max": 3.543774366378784,
"mean": -0.011590607464313507,
"std": 0.6826151609420776,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21073584258556366,
"max": 0.20936711132526398,
"mean": 3.4690663596848026e-05,
"std": 0.03448447957634926,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03585724160075188,
"max": 0.047966208308935165,
"mean": 0.0007884915685281157,
"std": 0.012871142476797104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21028311550617218,
"max": 0.19305972754955292,
"mean": -9.823215805226937e-07,
"std": 0.031695324927568436,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.1864088624715805,
"max": 0.17721442878246307,
"mean": -0.0028417375870049,
"std": 0.058615218847990036,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.47462186217308044,
"max": 1.0414687395095825,
"mean": 0.651329517364502,
"std": 0.049656689167022705,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24834677577018738,
"max": 0.3290989398956299,
"mean": 0.00018076221749652177,
"std": 0.04056994616985321,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12541481852531433,
"max": 0.024957137182354927,
"mean": -0.030498644337058067,
"std": 0.017614001408219337,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.4203978180885315,
"max": 0.4814401865005493,
"mean": 1.1958536560996436e-06,
"std": 0.03539701923727989,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15133719146251678,
"max": 0.04343123733997345,
"mean": 4.256972897564992e-05,
"std": 0.014886128716170788,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31556373834609985,
"max": 0.6816186308860779,
"mean": 0.5528932809829712,
"std": 0.04069383069872856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20636020600795746,
"max": 0.21985411643981934,
"mean": 3.188779010088183e-05,
"std": 0.03829942271113396,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13772568106651306,
"max": 0.1125853881239891,
"mean": 2.6155808882322162e-05,
"std": 0.025809435173869133,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.40282922983169556,
"max": 0.37083154916763306,
"mean": 2.5528193873469718e-05,
"std": 0.03817952424287796,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7708845138549805,
"max": 2.868703603744507,
"mean": 0.0011554225347936153,
"std": 0.5168288946151733,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20372195541858673,
"max": 0.1975945085287094,
"mean": 2.9724978958256543e-05,
"std": 0.03429732471704483,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.0505308173596859,
"max": 0.039880186319351196,
"mean": -0.0004213028587400913,
"std": 0.01341495756059885,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19602739810943604,
"max": 0.20172414183616638,
"mean": -1.2448943380150013e-05,
"std": 0.031805410981178284,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19294138252735138,
"max": 0.19508768618106842,
"mean": -0.0029671685770154,
"std": 0.06252522766590118,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.348909467458725,
"max": 1.083768367767334,
"mean": 0.667101263999939,
"std": 0.055243175476789474,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22561651468276978,
"max": 0.2514271140098572,
"mean": 0.0003585518861655146,
"std": 0.04075947403907776,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09107004851102829,
"max": 0.04363898187875748,
"mean": -0.03007982112467289,
"std": 0.017611678689718246,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.353363573551178,
"max": 0.3039560914039612,
"mean": -4.4702926970785484e-05,
"std": 0.037122584879398346,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16167114675045013,
"max": 0.06346774101257324,
"mean": -7.894223381299525e-05,
"std": 0.019427189603447914,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34871092438697815,
"max": 0.7219411134719849,
"mean": 0.5423486828804016,
"std": 0.03906320407986641,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.219291090965271,
"max": 0.22339218854904175,
"mean": -1.1523573448357638e-05,
"std": 0.03923090174794197,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.118381567299366,
"max": 0.17055465281009674,
"mean": 0.00028248116723261774,
"std": 0.025117389857769012,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24647831916809082,
"max": 0.30066463351249695,
"mean": -3.701161767821759e-05,
"std": 0.03893034905195236,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5050106048583984,
"max": 3.714456796646118,
"mean": 0.015847081318497658,
"std": 0.7823866009712219,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.2191196233034134,
"max": 0.2373991161584854,
"mean": -1.3136124835000373e-05,
"std": 0.03630338981747627,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04720474034547806,
"max": 0.051363855600357056,
"mean": 0.00048070820048451424,
"std": 0.013523152098059654,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21417121589183807,
"max": 0.21722286939620972,
"mean": 5.63644825888332e-05,
"std": 0.0336158350110054,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21132777631282806,
"max": 0.2312006652355194,
"mean": -0.0050989487208426,
"std": 0.06185900419950485,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36193206906318665,
"max": 1.1010645627975464,
"mean": 0.6992560029029846,
"std": 0.05359357222914696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.2351117730140686,
"max": 0.24475757777690887,
"mean": 0.00046337785897776484,
"std": 0.041268885135650635,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09809085726737976,
"max": 0.06809623539447784,
"mean": -0.0314301960170269,
"std": 0.018128085881471634,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.30171892046928406,
"max": 0.35163986682891846,
"mean": -8.267226803582162e-05,
"std": 0.04027453064918518,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.1522630751132965,
"max": 0.14965395629405975,
"mean": 0.0002633024996612221,
"std": 0.023038938641548157,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9992594122886658,
"max": 1.0015419721603394,
"mean": 1.0000762939453125,
"std": 0.0006376681849360466,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.03125917166471481,
"max": 0.03125542029738426,
"mean": -1.929077916429378e-05,
"std": 0.018040984869003296,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031228363513946533,
"max": 0.030987966805696487,
"mean": -0.0010841633193194866,
"std": 0.017950600013136864,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.03125608712434769,
"max": 0.03125986456871033,
"mean": 3.548163931554882e-06,
"std": 0.018041392788290977,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.03115428239107132,
"max": 0.031174642965197563,
"mean": 0.00033392058685421944,
"std": 0.01806280016899109,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0006233988679014146,
"max": 0.0007061311043798923,
"mean": 4.538033408607589e-06,
"std": 0.0001893796434160322,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.997599720954895,
"max": 1.002988576889038,
"mean": 0.9999969601631165,
"std": 0.000850954616907984,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.033545054495334625,
"max": 0.033692505210638046,
"mean": -6.091411705710925e-06,
"std": 0.018047811463475227,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.033063653856515884,
"max": 0.033412136137485504,
"mean": -0.00018106887000612915,
"std": 0.017954090610146523,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.001468250178731978,
"max": 0.0015634398441761732,
"mean": 1.9080666788795497e-06,
"std": 0.00028948785620741546,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0005752606084570289,
"max": 0.0007690406637266278,
"mean": 7.6006986091670115e-06,
"std": 0.00017151834617834538,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3833079934120178,
"max": 0.7191449403762817,
"mean": 0.5806841254234314,
"std": 0.03885476291179657,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23893281817436218,
"max": 0.19658899307250977,
"mean": 2.609232979011722e-05,
"std": 0.03746626526117325,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11880965530872345,
"max": 0.1667701154947281,
"mean": 0.000981115852482617,
"std": 0.02755648083984852,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.2465641349554062,
"max": 0.49993160367012024,
"mean": -5.0439630285836756e-05,
"std": 0.03762364014983177,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.9418535232543945,
"max": 3.7689952850341797,
"mean": -0.003572138026356697,
"std": 0.6813418865203857,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.2274625599384308,
"max": 0.25183549523353577,
"mean": -1.1858754987770226e-05,
"std": 0.03743482381105423,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07157625257968903,
"max": 0.08059139549732208,
"mean": -0.0005097019020467997,
"std": 0.0156550370156765,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22814570367336273,
"max": 0.2576799690723419,
"mean": -2.8758266125805676e-05,
"std": 0.03542165458202362,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20052188634872437,
"max": 0.21483485400676727,
"mean": -0.0055272276513278484,
"std": 0.06832942366600037,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.40502721071243286,
"max": 1.189380407333374,
"mean": 0.7378897666931152,
"std": 0.05522923544049263,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.22088685631752014,
"max": 0.2456110566854477,
"mean": 0.0005211912211962044,
"std": 0.04133584350347519,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10322928428649902,
"max": 0.024186961352825165,
"mean": -0.03266708552837372,
"std": 0.018890798091888428,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.44966569542884827,
"max": 0.42246878147125244,
"mean": -0.00043506931979209185,
"std": 0.04689610004425049,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2515268921852112,
"max": 0.47013524174690247,
"mean": 0.003204584587365389,
"std": 0.04452726989984512,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.31688186526298523,
"max": 0.33314481377601624,
"mean": -2.5167657440761104e-05,
"std": 0.02128784917294979,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3244757652282715,
"max": 0.6856456398963928,
"mean": 0.5710105299949646,
"std": 0.044706691056489944,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16456718742847443,
"max": 0.17448973655700684,
"mean": -4.871570490649901e-05,
"std": 0.03318251296877861,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18692979216575623,
"max": 0.14325818419456482,
"mean": 3.459470462985337e-05,
"std": 0.029701216146349907,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.38104259967803955,
"max": 0.2459549903869629,
"mean": -9.848581612459384e-06,
"std": 0.03276371210813522,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.655487537384033,
"max": 3.2897744178771973,
"mean": -0.01425144076347351,
"std": 0.985081136226654,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23475398123264313,
"max": 0.24735963344573975,
"mean": -1.814730239857454e-05,
"std": 0.041698258370161057,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07251452654600143,
"max": 0.15445762872695923,
"mean": 0.0006656228797510266,
"std": 0.0251647736877203,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.26630881428718567,
"max": 0.2481267750263214,
"mean": -1.5170076949289069e-05,
"std": 0.0401393324136734,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.1895921230316162,
"max": 0.19462409615516663,
"mean": -0.001237674499861896,
"std": 0.06668463349342346,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32920053601264954,
"max": 0.999627411365509,
"mean": 0.7191565632820129,
"std": 0.052332233637571335,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23170752823352814,
"max": 0.24531398713588715,
"mean": 0.00018265214748680592,
"std": 0.040900230407714844,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11451739817857742,
"max": 0.019039874896407127,
"mean": -0.0424770824611187,
"std": 0.018864724785089493,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.38964730501174927,
"max": 0.40745288133621216,
"mean": -2.1833995560882613e-05,
"std": 0.0485333576798439,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6929526925086975,
"max": 0.4126836955547333,
"mean": 0.0008477572700940073,
"std": 0.060282669961452484,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0013933395966887474,
"max": 1.000746726989746,
"mean": 0.00048820103984326124,
"std": 0.022089513018727303,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9992843866348267,
"max": 1.001552939414978,
"mean": 1.0000746250152588,
"std": 0.0006248687277548015,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03125389292836189,
"max": 0.03125779330730438,
"mean": -2.1020408894401044e-05,
"std": 0.01803232543170452,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031215354800224304,
"max": 0.031232187524437904,
"mean": -0.0006770011968910694,
"std": 0.017826862633228302,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03125695139169693,
"max": 0.03126237541437149,
"mean": -8.831485502014402e-06,
"std": 0.018031351268291473,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.03123210370540619,
"max": 0.03124479576945305,
"mean": -0.0007297537522390485,
"std": 0.017941787838935852,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.0005147741758264601,
"max": 0.00041916739428415895,
"mean": -4.1531684473739006e-06,
"std": 0.0001558788208058104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.997329831123352,
"max": 1.0023579597473145,
"mean": 0.9995578527450562,
"std": 0.0008328193798661232,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.033257633447647095,
"max": 0.03283705189824104,
"mean": -2.9398686365311733e-06,
"std": 0.01802799478173256,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.0324481800198555,
"max": 0.03130009397864342,
"mean": -0.000511951744556427,
"std": 0.01803583651781082,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0017112370114773512,
"max": 0.0015153783606365323,
"mean": -1.2167475915703108e-06,
"std": 0.00028721734997816384,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.00046955313882790506,
"max": 0.0003882118908222765,
"mean": -3.8059165490267333e-06,
"std": 0.00014281016774475574,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23431308567523956,
"max": 0.2725020945072174,
"mean": 6.621908141823951e-06,
"std": 0.018810350447893143,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32144924998283386,
"max": 0.6939579248428345,
"mean": 0.5816149711608887,
"std": 0.045937687158584595,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18192073702812195,
"max": 0.1977624148130417,
"mean": -1.1576559700188227e-05,
"std": 0.03318417817354202,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16049131751060486,
"max": 0.1293114274740219,
"mean": -0.00107291666790843,
"std": 0.03413516655564308,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.3323962688446045,
"max": 0.31116873025894165,
"mean": -1.0262579962727614e-05,
"std": 0.03223471716046333,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.802563190460205,
"max": 8.761749267578125,
"mean": 0.09345458447933197,
"std": 1.6194684505462646,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23397405445575714,
"max": 0.2418195903301239,
"mean": 4.162176628597081e-05,
"std": 0.04085618257522583,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07595669478178024,
"max": 0.0657576471567154,
"mean": 0.00048221880570054054,
"std": 0.019416553899645805,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.2459147870540619,
"max": 0.23389238119125366,
"mean": -3.2510670280316845e-06,
"std": 0.03943093866109848,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.1629837304353714,
"max": 0.16088047623634338,
"mean": 0.0016233830247074366,
"std": 0.06528986245393753,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5571612119674683,
"max": 0.9436106085777283,
"mean": 0.7128171324729919,
"std": 0.04012364149093628,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22801116108894348,
"max": 0.2548006474971771,
"mean": -4.5571337977889925e-05,
"std": 0.04057438299059868,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13471974432468414,
"max": 0.0221097432076931,
"mean": -0.041352279484272,
"std": 0.01838749460875988,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.42162591218948364,
"max": 0.3923877477645874,
"mean": -4.321471351431683e-06,
"std": 0.04778357967734337,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6071884632110596,
"max": 0.651282787322998,
"mean": 0.0015848546754568815,
"std": 0.0568372942507267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.25181877613067627,
"max": 0.32084232568740845,
"mean": -6.161948476801626e-06,
"std": 0.019613562151789665,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.35955217480659485,
"max": 0.6821547150611877,
"mean": 0.5706839561462402,
"std": 0.0429888591170311,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22016532719135284,
"max": 0.17702604830265045,
"mean": -3.4450480598025024e-05,
"std": 0.034298721700906754,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.1631413698196411,
"max": 0.23277200758457184,
"mean": 0.000363422412192449,
"std": 0.032813675701618195,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.2639073431491852,
"max": 0.2398279309272766,
"mean": -5.2961986511945724e-05,
"std": 0.033897411078214645,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.854308605194092,
"max": 5.090536117553711,
"mean": 0.04387902468442917,
"std": 1.2290979623794556,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24643683433532715,
"max": 0.2503347098827362,
"mean": 7.216692029032856e-05,
"std": 0.04398633539676666,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06248769536614418,
"max": 0.05441384017467499,
"mean": 0.0006457050913013518,
"std": 0.017188573256134987,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2864288091659546,
"max": 0.2721114456653595,
"mean": -5.008514563087374e-05,
"std": 0.04298446327447891,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16100545227527618,
"max": 0.170342355966568,
"mean": -0.0028870203532278538,
"std": 0.059300076216459274,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5198097229003906,
"max": 0.9330063462257385,
"mean": 0.7133984565734863,
"std": 0.03842313215136528,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23787352442741394,
"max": 0.24874305725097656,
"mean": 0.0004645891021937132,
"std": 0.04045315086841583,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14499974250793457,
"max": 0.04109013453125954,
"mean": -0.039695803076028824,
"std": 0.020541805773973465,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5323729515075684,
"max": 0.5824694633483887,
"mean": 5.902071279706433e-06,
"std": 0.04885893687605858,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5189845561981201,
"max": 0.4933343231678009,
"mean": 0.0023664908949285746,
"std": 0.05344504490494728,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.2737047076225281,
"max": 0.31558480858802795,
"mean": 1.935944737851969e-06,
"std": 0.020050112158060074,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.3658909797668457,
"max": 0.7117034196853638,
"mean": 0.5931328535079956,
"std": 0.04596179351210594,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.2108193188905716,
"max": 0.1990451216697693,
"mean": 3.062548057641834e-05,
"std": 0.034867268055677414,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18712614476680756,
"max": 0.20343470573425293,
"mean": 0.0009520579478703439,
"std": 0.031497176736593246,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.2896800935268402,
"max": 0.3398098945617676,
"mean": -4.6883709728717804e-05,
"std": 0.03458770364522934,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.8768599033355713,
"max": 3.3869552612304688,
"mean": 0.014455841854214668,
"std": 0.8583106398582458,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22448983788490295,
"max": 0.24981370568275452,
"mean": -3.890434527420439e-06,
"std": 0.042229313403367996,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05526347830891609,
"max": 0.046524014323949814,
"mean": -2.1809362806379795e-05,
"std": 0.01583988219499588,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2933104932308197,
"max": 0.29035091400146484,
"mean": -7.618443305545952e-06,
"std": 0.04194440320134163,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.124831423163414,
"max": 0.25899115204811096,
"mean": -0.0032436971087008715,
"std": 0.05317322164773941,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.45623838901519775,
"max": 0.844422459602356,
"mean": 0.7054718732833862,
"std": 0.03522763401269913,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5120505094528198,
"max": 0.3482021689414978,
"mean": 0.00034296896774321795,
"std": 0.04019856080412865,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18573501706123352,
"max": 0.03954247012734413,
"mean": -0.039387013763189316,
"std": 0.02136080153286457,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.543980062007904,
"max": 0.5556398034095764,
"mean": -7.12752080289647e-05,
"std": 0.050733935087919235,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5116539001464844,
"max": 0.6641847491264343,
"mean": 0.0024422036949545145,
"std": 0.049520041793584824,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3325117230415344,
"max": 0.2653426229953766,
"mean": 3.3086610073951306e-06,
"std": 0.019387137144804,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.3219893276691437,
"max": 0.7664631009101868,
"mean": 0.6510411500930786,
"std": 0.04532777890563011,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.2498156577348709,
"max": 0.2198626697063446,
"mean": -1.886132849904243e-06,
"std": 0.03650164604187012,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32695695757865906,
"max": 0.2867416441440582,
"mean": -0.000684951723087579,
"std": 0.03855687379837036,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.31001296639442444,
"max": 0.3700636327266693,
"mean": 6.516962457681075e-05,
"std": 0.036242250353097916,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.716774940490723,
"max": 5.807016372680664,
"mean": 0.03795425221323967,
"std": 1.4130064249038696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22152067720890045,
"max": 0.20586349070072174,
"mean": -7.513246237067506e-05,
"std": 0.042484886944293976,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.0776548758149147,
"max": 0.05150791257619858,
"mean": -0.0009258093778043985,
"std": 0.016412504017353058,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.33054521679878235,
"max": 0.32925283908843994,
"mean": -4.675353011407424e-06,
"std": 0.042791180312633514,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.284753680229187,
"max": 0.1120273545384407,
"mean": -0.0012038055574521422,
"std": 0.04701421782374382,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4860539734363556,
"max": 0.8868206739425659,
"mean": 0.7373669743537903,
"std": 0.03824283927679062,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.362324595451355,
"max": 0.27455514669418335,
"mean": 5.109608173370361e-05,
"std": 0.04064401239156723,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24754445254802704,
"max": 0.046375077217817307,
"mean": -0.039263028651475906,
"std": 0.02328905090689659,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6261394023895264,
"max": 0.5965179204940796,
"mean": -5.992384103592485e-05,
"std": 0.053116101771593094,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7094439268112183,
"max": 0.2657933533191681,
"mean": 0.000917100696824491,
"std": 0.05122515559196472,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3433791399002075,
"max": 0.30369648337364197,
"mean": 2.4011274035729e-07,
"std": 0.019135721027851105,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34975123405456543,
"max": 0.7829355597496033,
"mean": 0.6388096809387207,
"std": 0.049248941242694855,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20544706284999847,
"max": 0.20679640769958496,
"mean": -5.99185805185698e-05,
"std": 0.037696123123168945,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.2586185336112976,
"max": 0.2680370807647705,
"mean": -0.00040146420360542834,
"std": 0.04459588602185249,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3540765345096588,
"max": 0.3223837912082672,
"mean": -6.969309197302209e-06,
"std": 0.03720474615693092,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.260976791381836,
"max": 4.204005241394043,
"mean": -0.026412418112158775,
"std": 1.0066431760787964,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23861557245254517,
"max": 0.24334679543972015,
"mean": -2.5082641514018178e-05,
"std": 0.04320957139134407,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06232341378927231,
"max": 0.056674133986234665,
"mean": 0.0003426429466344416,
"std": 0.01415110845118761,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.43692541122436523,
"max": 0.37342891097068787,
"mean": 1.4435072444030084e-05,
"std": 0.04412085935473442,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09643299132585526,
"max": 0.17589901387691498,
"mean": -0.0006592142744921148,
"std": 0.03515716642141342,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4216461777687073,
"max": 1.0694262981414795,
"mean": 0.7483195662498474,
"std": 0.04205932468175888,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2665816843509674,
"max": 0.2969212532043457,
"mean": -7.953966996865347e-05,
"std": 0.04080412909388542,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.1857525259256363,
"max": 0.043901920318603516,
"mean": -0.036818623542785645,
"std": 0.025608688592910767,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4569249451160431,
"max": 0.4865773022174835,
"mean": 4.3881707824766636e-05,
"std": 0.05420896038413048,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.28651300072669983,
"max": 0.5512722134590149,
"mean": -0.00088057282846421,
"std": 0.04782658815383911,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.292865514755249,
"max": 0.32280707359313965,
"mean": 6.539526111737359e-06,
"std": 0.019969915971159935,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2909410893917084,
"max": 0.7601442337036133,
"mean": 0.6508233547210693,
"std": 0.05213604494929314,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.2434738278388977,
"max": 0.2616451680660248,
"mean": -6.040764219505945e-06,
"std": 0.03961297869682312,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2675459682941437,
"max": 0.1998538225889206,
"mean": -0.0008808721322566271,
"std": 0.05175367370247841,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2721429765224457,
"max": 0.25373363494873047,
"mean": 4.028795956401154e-06,
"std": 0.03871006891131401,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.963708877563477,
"max": 15.945626258850098,
"mean": 0.03322511166334152,
"std": 1.988985300064087,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.2071155309677124,
"max": 0.22583135962486267,
"mean": -7.227471360238269e-05,
"std": 0.04055366292595863,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06934842467308044,
"max": 0.06322810798883438,
"mean": 0.00015266213449649513,
"std": 0.01474202610552311,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46502697467803955,
"max": 0.32068270444869995,
"mean": 1.9500737835187465e-05,
"std": 0.0405886135995388,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06406750530004501,
"max": 0.1152099147439003,
"mean": 0.0011921885889023542,
"std": 0.0247051939368248,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.37462663650512695,
"max": 0.9322708249092102,
"mean": 0.7508515119552612,
"std": 0.040188200771808624,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.27930018305778503,
"max": 0.2731732130050659,
"mean": -0.00016858182789292186,
"std": 0.040994688868522644,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19882012903690338,
"max": 0.05084774270653725,
"mean": -0.03202420845627785,
"std": 0.025111209601163864,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6573402285575867,
"max": 0.5352922677993774,
"mean": -4.871936471317895e-05,
"std": 0.05284557491540909,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.1931021511554718,
"max": 0.5820591449737549,
"mean": -0.0005149454809725285,
"std": 0.04106936603784561,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.4177095592021942,
"max": 0.37194108963012695,
"mean": 6.037503226252738e-06,
"std": 0.021621696650981903,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21426498889923096,
"max": 0.7471067905426025,
"mean": 0.6495591998100281,
"std": 0.05437273159623146,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20954997837543488,
"max": 0.19577716290950775,
"mean": 4.0040544263320044e-05,
"std": 0.03946496173739433,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.3292751908302307,
"max": 0.25935792922973633,
"mean": -0.003224420826882124,
"std": 0.05625506490468979,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.2056337594985962,
"max": 0.25471389293670654,
"mean": 5.435157800093293e-05,
"std": 0.038567062467336655,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.24283504486084,
"max": 6.9316864013671875,
"mean": 0.048334453254938126,
"std": 1.3849503993988037,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20960700511932373,
"max": 0.23016247153282166,
"mean": -5.2383575166459195e-06,
"std": 0.04131292924284935,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.043877486139535904,
"max": 0.035942550748586655,
"mean": 4.677800461649895e-06,
"std": 0.012800506316125393,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.39784368872642517,
"max": 0.3448275029659271,
"mean": -5.554455128731206e-05,
"std": 0.04238935187458992,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.05505242943763733,
"max": 0.06286512315273285,
"mean": 0.0003699597145896405,
"std": 0.018672524020075798,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3501029312610626,
"max": 1.0451030731201172,
"mean": 0.7893401980400085,
"std": 0.04874471575021744,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3334510326385498,
"max": 0.38586220145225525,
"mean": -0.0001694880920695141,
"std": 0.041480448096990585,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15723954141139984,
"max": 0.05913884937763214,
"mean": -0.031833715736866,
"std": 0.025140652433037758,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6964147090911865,
"max": 0.4686952233314514,
"mean": -9.150124969892204e-05,
"std": 0.05179166793823242,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24826228618621826,
"max": 0.32854214310646057,
"mean": -0.00024761329405009747,
"std": 0.0414327047765255,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2872432768344879,
"max": 0.35023465752601624,
"mean": -2.1361338440328836e-06,
"std": 0.024239059537649155,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19656625390052795,
"max": 0.7792166471481323,
"mean": 0.6702941060066223,
"std": 0.058692529797554016,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22861525416374207,
"max": 0.23119905591011047,
"mean": -1.981826062547043e-05,
"std": 0.04044099524617195,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.21965257823467255,
"max": 0.24067652225494385,
"mean": 0.0007787347421981394,
"std": 0.05579977110028267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.215622216463089,
"max": 0.22666674852371216,
"mean": -7.155455386964604e-05,
"std": 0.03937716409564018,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.904394149780273,
"max": 9.067266464233398,
"mean": -0.001250309869647026,
"std": 1.8481073379516602,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2693168520927429,
"max": 0.25895655155181885,
"mean": 4.356484714662656e-05,
"std": 0.038407694548368454,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05762461572885513,
"max": 0.057689178735017776,
"mean": 0.00034963880898430943,
"std": 0.014724270440638065,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.2649986743927002,
"max": 0.28868991136550903,
"mean": -6.175809539854527e-05,
"std": 0.039074063301086426,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.043768905103206635,
"max": 0.0373171903192997,
"mean": -8.572106889914721e-05,
"std": 0.013365655206143856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.3394976556301117,
"max": 1.0926626920700073,
"mean": 0.86370849609375,
"std": 0.06385412812232971,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.42326879501342773,
"max": 0.419196218252182,
"mean": 0.00031274266075342894,
"std": 0.043502915650606155,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21476341784000397,
"max": 0.17061911523342133,
"mean": -0.029481371864676476,
"std": 0.031948987394571304,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5996708869934082,
"max": 0.5596612691879272,
"mean": -0.00015256566985044628,
"std": 0.053446218371391296,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17847125232219696,
"max": 0.3766724169254303,
"mean": 0.0013643248239532113,
"std": 0.037309642881155014,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.39427170157432556,
"max": 0.3689534664154053,
"mean": 3.643418676801957e-05,
"std": 0.028621334582567215,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2903065085411072,
"max": 0.826573371887207,
"mean": 0.7055738568305969,
"std": 0.06789194792509079,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9261522889137268,
"max": 1.0264601707458496,
"mean": -2.5637811631895602e-05,
"std": 0.047625649720430374,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8783160448074341,
"max": 0.8149734735488892,
"mean": -0.00031416097772307694,
"std": 0.09553803503513336,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.2693849802017212,
"max": 0.24096263945102692,
"mean": -2.2922044081497006e-05,
"std": 0.03895637020468712,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.73985481262207,
"max": 22.84831428527832,
"mean": -0.09187203645706177,
"std": 4.069868564605713,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.227765753865242,
"max": 0.24508675932884216,
"mean": -2.5811230443650857e-05,
"std": 0.03863935545086861,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06041998043656349,
"max": 0.046056248247623444,
"mean": -0.00014605963951908052,
"std": 0.014698919840157032,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.33846333622932434,
"max": 0.3745211064815521,
"mean": 7.246726454468444e-06,
"std": 0.04081542044878006,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.0464671291410923,
"max": 0.1957084834575653,
"mean": 0.0002726902603171766,
"std": 0.013569602742791176,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.3744957149028778,
"max": 1.1300216913223267,
"mean": 0.8900200724601746,
"std": 0.06398579478263855,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.4477945864200592,
"max": 0.5424723625183105,
"mean": 2.4591532564954832e-05,
"std": 0.04556761309504509,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22407397627830505,
"max": 0.08826831728219986,
"mean": -0.03201541677117348,
"std": 0.03776346147060394,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7253258228302002,
"max": 0.6892617344856262,
"mean": 3.4524080547271296e-05,
"std": 0.05177822336554527,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.1745493859052658,
"max": 0.21855643391609192,
"mean": 4.002213245257735e-05,
"std": 0.0317784883081913,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.3402628004550934,
"max": 0.37424033880233765,
"mean": 4.292904486646876e-05,
"std": 0.03414493426680565,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3175790011882782,
"max": 1.2868926525115967,
"mean": 0.6014685034751892,
"std": 0.0834617167711258,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.28334787487983704,
"max": 0.26021766662597656,
"mean": -3.078439021919621e-06,
"std": 0.03598484769463539,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23551659286022186,
"max": 0.20537099242210388,
"mean": 0.0002320160565432161,
"std": 0.056010857224464417,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.4354335069656372,
"max": 0.3252001106739044,
"mean": 2.4517319616279565e-05,
"std": 0.03413575515151024,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.544912338256836,
"max": 7.312640190124512,
"mean": -0.007366480305790901,
"std": 0.6992346048355103,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.343842089176178,
"max": 0.36349090933799744,
"mean": 0.0001033815206028521,
"std": 0.04782803729176521,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07375385612249374,
"max": 0.06036338210105896,
"mean": 0.0009326335857622325,
"std": 0.014949528500437737,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.25554072856903076,
"max": 0.28654900193214417,
"mean": 4.4343978515826166e-06,
"std": 0.041555255651474,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05532766133546829,
"max": 0.06282689422369003,
"mean": 0.00014148413902148604,
"std": 0.007174154743552208,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49368223547935486,
"max": 1.2208430767059326,
"mean": 1.0134273767471313,
"std": 0.11743992567062378,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0936156511306763,
"max": 1.0469433069229126,
"mean": -4.977267235517502e-05,
"std": 0.05241084843873978,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22367312014102936,
"max": 0.17280347645282745,
"mean": -0.02724579907953739,
"std": 0.03635029122233391,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8845533132553101,
"max": 0.9224876165390015,
"mean": -0.000146063175634481,
"std": 0.053282301872968674,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17102308571338654,
"max": 0.37991419434547424,
"mean": 0.003368670353665948,
"std": 0.03989797830581665,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7772527933120728,
"max": 0.7234945297241211,
"mean": 1.913893902383279e-05,
"std": 0.04616517201066017,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3385581970214844,
"max": 1.4277539253234863,
"mean": 0.9483213424682617,
"std": 0.20673882961273193,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7455896139144897,
"max": 1.7045435905456543,
"mean": 0.00022695818915963173,
"std": 0.15868604183197021,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.199622631072998,
"max": 1.099592685699463,
"mean": -0.00953536294400692,
"std": 0.203833669424057,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4213031232357025,
"max": 0.42637819051742554,
"mean": 6.450257205870003e-05,
"std": 0.048018429428339005,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.743934631347656,
"max": 19.539039611816406,
"mean": -0.24830012023448944,
"std": 4.776192665100098,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32387086749076843,
"max": 0.4384032189846039,
"mean": -1.2015252650598995e-05,
"std": 0.046161383390426636,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.0340605154633522,
"max": 0.037125036120414734,
"mean": 0.0006421188591048121,
"std": 0.012921434827148914,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.703487753868103,
"max": 0.6645694375038147,
"mean": 4.3493168050190434e-05,
"std": 0.0578836165368557,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.0722307413816452,
"max": 0.06750312447547913,
"mean": -0.00013278273399919271,
"std": 0.012919807806611061,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.3801887333393097,
"max": 1.3909631967544556,
"mean": 1.0665581226348877,
"std": 0.2197146713733673,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6164069175720215,
"max": 0.7170259952545166,
"mean": 0.00011130621714983135,
"std": 0.058021292090415955,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21958374977111816,
"max": 0.2251792550086975,
"mean": 0.0062429094687104225,
"std": 0.04972800984978676,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6296579241752625,
"max": 0.8892135620117188,
"mean": 1.1699157766997814e-05,
"std": 0.023528022691607475,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5068321824073792,
"max": 0.4739873707294464,
"mean": -0.003016006201505661,
"std": 0.06930257380008698,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5377203226089478,
"max": 1.1807109117507935,
"mean": 0.7827430367469788,
"std": 0.09885811805725098,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2669532299041748,
"max": 0.2126723825931549,
"mean": -0.00022305321181192994,
"std": 0.05399656668305397,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23791296780109406,
"max": 0.014832733199000359,
"mean": -0.04395970329642296,
"std": 0.03433232381939888,
"sparsity": 0.0,
"shape": [
100
]
}
}
}