mr6 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
53f5f19 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43005406856536865,
"max": 0.29851898550987244,
"mean": -0.0025509949773550034,
"std": 0.042555101215839386,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06313250213861465,
"max": 0.10729768127202988,
"mean": 0.0006133262650109828,
"std": 0.03408696502447128,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.41268208622932434,
"max": 0.8365541696548462,
"mean": -0.00020702443725895137,
"std": 0.02410811372101307,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11502047628164291,
"max": 0.3207014203071594,
"mean": -0.00093841488705948,
"std": 0.019534854218363762,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7852821350097656,
"max": 2.8634164333343506,
"mean": -0.00036539402208290994,
"std": 0.615379810333252,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.27854230999946594,
"max": 0.38152772188186646,
"mean": 0.0004230512131471187,
"std": 0.042748332023620605,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22163018584251404,
"max": 0.20894938707351685,
"mean": -0.004489985294640064,
"std": 0.040880318731069565,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4279509484767914,
"max": 0.47543206810951233,
"mean": 3.1694014523964142e-06,
"std": 0.02450772561132908,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32420721650123596,
"max": 0.15700779855251312,
"mean": -0.04670684412121773,
"std": 0.051544804126024246,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4101617932319641,
"max": 0.3544142544269562,
"mean": -0.00012779857206624,
"std": 0.02359919063746929,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.2289954274892807,
"max": 0.26173391938209534,
"mean": -0.029131349176168442,
"std": 0.04930002987384796,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.25456100702285767,
"max": 0.818419873714447,
"mean": 0.5253804922103882,
"std": 0.08069705218076706,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.2965428829193115,
"max": 0.26520034670829773,
"mean": -0.00042467008461244404,
"std": 0.03210080415010452,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09260489046573639,
"max": 0.1250484734773636,
"mean": 0.0006493350956588984,
"std": 0.025727085769176483,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.2901724576950073,
"max": 0.281167596578598,
"mean": -7.525501860072836e-05,
"std": 0.030932163819670677,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.8939008712768555,
"max": 5.80875825881958,
"mean": -0.009307368658483028,
"std": 1.2948225736618042,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4246821701526642,
"max": 0.34353208541870117,
"mean": 9.80871482170187e-05,
"std": 0.029952067881822586,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.02886669710278511,
"max": 0.027609167620539665,
"mean": -0.0003159984771627933,
"std": 0.01256631314754486,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4538891911506653,
"max": 0.4482215344905853,
"mean": 2.2922111384104937e-05,
"std": 0.02385348081588745,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08867117762565613,
"max": 0.09104129672050476,
"mean": 0.0022725451271981,
"std": 0.019507737830281258,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.26674631237983704,
"max": 1.054079532623291,
"mean": 0.5310790538787842,
"std": 0.10425138473510742,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5743944644927979,
"max": 0.6082407832145691,
"mean": -0.00042930786730721593,
"std": 0.03859541565179825,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18188051879405975,
"max": 0.04570186883211136,
"mean": -0.029450394213199615,
"std": 0.04259800165891647,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1662050485610962,
"max": 1.6339434385299683,
"mean": 0.00032052083406597376,
"std": 0.027692945674061775,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16221286356449127,
"max": 0.2055274099111557,
"mean": -0.021118517965078354,
"std": 0.027932317927479744,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22425268590450287,
"max": 0.8419703841209412,
"mean": 0.48751628398895264,
"std": 0.0750974491238594,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.2551511526107788,
"max": 0.30577754974365234,
"mean": -8.399176294915378e-06,
"std": 0.03346917778253555,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09521990269422531,
"max": 0.11036473512649536,
"mean": 6.435990508180112e-05,
"std": 0.026954451575875282,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.2969436049461365,
"max": 0.29559123516082764,
"mean": 5.0998860388062894e-05,
"std": 0.032539013773202896,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.159433841705322,
"max": 5.079733371734619,
"mean": -0.014565235003829002,
"std": 1.156693696975708,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.3445141315460205,
"max": 0.3432990610599518,
"mean": 7.890153938205913e-05,
"std": 0.03005831316113472,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03612125664949417,
"max": 0.03314004838466644,
"mean": -0.00014305136573966593,
"std": 0.013020108453929424,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3150654435157776,
"max": 0.3748987019062042,
"mean": -2.0872395907645114e-05,
"std": 0.02405514195561409,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10531895607709885,
"max": 0.12192098051309586,
"mean": -0.0019657753873616457,
"std": 0.028842739760875702,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.3119339942932129,
"max": 1.1190955638885498,
"mean": 0.6662184000015259,
"std": 0.09769617766141891,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8722184300422668,
"max": 0.6274752616882324,
"mean": 0.0016759471036493778,
"std": 0.047436658293008804,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27076128125190735,
"max": 0.034267961978912354,
"mean": -0.046592649072408676,
"std": 0.040578801184892654,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9206072688102722,
"max": 0.96403568983078,
"mean": 0.0010221146512776613,
"std": 0.040701672434806824,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14442752301692963,
"max": 0.0748896598815918,
"mean": -0.009088763035833836,
"std": 0.02569626271724701,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.23972344398498535,
"max": 0.7111932635307312,
"mean": 0.44715946912765503,
"std": 0.05921364948153496,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.27250099182128906,
"max": 0.297283798456192,
"mean": 8.777939001447521e-06,
"std": 0.03547067567706108,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11882907897233963,
"max": 0.1182771623134613,
"mean": 0.0007498766062781215,
"std": 0.027608048170804977,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2806638181209564,
"max": 0.27924486994743347,
"mean": -7.666053716093302e-05,
"std": 0.03510000556707382,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5072221755981445,
"max": 2.5192060470581055,
"mean": 0.026715079322457314,
"std": 0.586592435836792,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.22091323137283325,
"max": 0.2714807987213135,
"mean": 2.762420081126038e-06,
"std": 0.030731365084648132,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03329985961318016,
"max": 0.031178824603557587,
"mean": 0.00011736361193470657,
"std": 0.012398799881339073,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.2350921630859375,
"max": 0.23149597644805908,
"mean": 5.688454257324338e-05,
"std": 0.025696979835629463,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13562175631523132,
"max": 0.1278066188097,
"mean": -0.00549966748803854,
"std": 0.039964329451322556,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3545263111591339,
"max": 1.1705567836761475,
"mean": 0.7105071544647217,
"std": 0.10373809188604355,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6171801686286926,
"max": 0.5549061298370361,
"mean": 0.0011606733314692974,
"std": 0.04611368104815483,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.1888936311006546,
"max": 0.024856731295585632,
"mean": -0.034840360283851624,
"std": 0.028601042926311493,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1303929090499878,
"max": 0.9700294137001038,
"mean": 0.00035928928991779685,
"std": 0.04234178736805916,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5973078012466431,
"max": 0.06291170418262482,
"mean": -0.004878643434494734,
"std": 0.028604039922356606,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3753509521484375,
"max": 0.9391864538192749,
"mean": 0.5924164056777954,
"std": 0.06680406630039215,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.391277939081192,
"max": 0.36899876594543457,
"mean": 7.035685848677531e-05,
"std": 0.03718537837266922,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11886083334684372,
"max": 0.1363811194896698,
"mean": 0.0009265001863241196,
"std": 0.029201578348875046,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6185654401779175,
"max": 0.5083082914352417,
"mean": 1.5324059859267436e-05,
"std": 0.0364382304251194,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.179115295410156,
"max": 8.780653953552246,
"mean": -0.10920821875333786,
"std": 1.697803258895874,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.27624833583831787,
"max": 0.23940874636173248,
"mean": 5.239578240434639e-05,
"std": 0.0326123982667923,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05171733349561691,
"max": 0.039454903453588486,
"mean": 9.008367487695068e-05,
"std": 0.012963240966200829,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.2306506633758545,
"max": 0.23440538346767426,
"mean": -2.216407301602885e-05,
"std": 0.02938910946249962,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.2041204422712326,
"max": 0.1051875501871109,
"mean": -0.004020026419311762,
"std": 0.03262867406010628,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3396590054035187,
"max": 1.0105489492416382,
"mean": 0.7007004022598267,
"std": 0.0967300534248352,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5642524361610413,
"max": 0.8327149152755737,
"mean": 0.0004152198671363294,
"std": 0.04229423776268959,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21180973947048187,
"max": 0.030382230877876282,
"mean": -0.032180484384298325,
"std": 0.02649112045764923,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7539102435112,
"max": 0.7183676958084106,
"mean": -1.6375699487980455e-05,
"std": 0.03683510050177574,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.26317542791366577,
"max": 0.10612691938877106,
"mean": -0.003012202214449644,
"std": 0.028860073536634445,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28410062193870544,
"max": 0.6937515735626221,
"mean": 0.49938827753067017,
"std": 0.04646085575222969,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27815356850624084,
"max": 0.233821839094162,
"mean": -0.00011090396583313122,
"std": 0.03875657916069031,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15374495089054108,
"max": 0.126325324177742,
"mean": -0.0022300099954009056,
"std": 0.033342309296131134,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.4138854146003723,
"max": 0.6591927409172058,
"mean": -1.8888074919232167e-05,
"std": 0.03909528627991676,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.2339067459106445,
"max": 4.718007564544678,
"mean": -0.020461430773139,
"std": 1.007363200187683,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.2449360489845276,
"max": 0.207246333360672,
"mean": 4.3898020521737635e-05,
"std": 0.033962249755859375,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03454353287816048,
"max": 0.04481153190135956,
"mean": -1.8621416529640555e-05,
"std": 0.01263485848903656,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20073898136615753,
"max": 0.20600160956382751,
"mean": -2.920800579886418e-05,
"std": 0.0310201458632946,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.1997092068195343,
"max": 0.11323567479848862,
"mean": -0.002894954290241003,
"std": 0.0345144160091877,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.36691704392433167,
"max": 1.0552048683166504,
"mean": 0.670504629611969,
"std": 0.06634049117565155,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.39792558550834656,
"max": 0.5017094612121582,
"mean": -3.8320780731737614e-05,
"std": 0.04113030061125755,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12866847217082977,
"max": 0.026868799701333046,
"mean": -0.030530910938978195,
"std": 0.02187257632613182,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.4486997127532959,
"max": 0.4325278401374817,
"mean": 7.570705201942474e-05,
"std": 0.03489042818546295,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.26739102602005005,
"max": 0.07290376722812653,
"mean": -0.001090540667064488,
"std": 0.023126306012272835,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28740835189819336,
"max": 0.6838006973266602,
"mean": 0.5244842767715454,
"std": 0.04748576506972313,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22222448885440826,
"max": 0.22337274253368378,
"mean": 1.5597350284224376e-05,
"std": 0.038948558270931244,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.1362549066543579,
"max": 0.1092236116528511,
"mean": 0.00024021141871344298,
"std": 0.029209597036242485,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37488552927970886,
"max": 0.43708565831184387,
"mean": -9.820145351113752e-06,
"std": 0.039285808801651,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8422415256500244,
"max": 4.994611740112305,
"mean": 0.009733816608786583,
"std": 0.8449002504348755,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22278591990470886,
"max": 0.21995313465595245,
"mean": -2.4143082555383444e-07,
"std": 0.03440921753644943,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04355766996741295,
"max": 0.03580183535814285,
"mean": -0.0002584094472695142,
"std": 0.012078197672963142,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21266809105873108,
"max": 0.18842695653438568,
"mean": -1.707848787191324e-05,
"std": 0.03153562918305397,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18067854642868042,
"max": 0.12067519873380661,
"mean": -0.0023923253174871206,
"std": 0.04126231372356415,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.42283520102500916,
"max": 0.9399095773696899,
"mean": 0.6626414060592651,
"std": 0.056763265281915665,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.37058448791503906,
"max": 0.4756770133972168,
"mean": -8.219464507419616e-05,
"std": 0.040889278054237366,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20835021138191223,
"max": 0.027245184406638145,
"mean": -0.03023524209856987,
"std": 0.02135040983557701,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3404720425605774,
"max": 0.7332155108451843,
"mean": 8.202612661989406e-05,
"std": 0.03476588428020477,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2399250864982605,
"max": 0.050362419337034225,
"mean": -0.0011862949468195438,
"std": 0.020457014441490173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.306090772151947,
"max": 0.6522687077522278,
"mean": 0.5250887274742126,
"std": 0.0460890494287014,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.3040372133255005,
"max": 0.21722179651260376,
"mean": 7.015860319370404e-05,
"std": 0.0394948311150074,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14904865622520447,
"max": 0.1309719830751419,
"mean": 0.0003389039193280041,
"std": 0.03043319098651409,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.2568168342113495,
"max": 0.20181529223918915,
"mean": 3.114001810899936e-05,
"std": 0.039484698325395584,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.3340678215026855,
"max": 2.373654365539551,
"mean": -0.026232335716485977,
"std": 0.4496069550514221,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18832948803901672,
"max": 0.2102191150188446,
"mean": 3.7190951843513176e-05,
"std": 0.03479335457086563,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03177480027079582,
"max": 0.03555988520383835,
"mean": -0.00019898739992640913,
"std": 0.012286651879549026,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.1882997751235962,
"max": 0.16997897624969482,
"mean": -6.833271618233994e-05,
"std": 0.03217003867030144,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13938407599925995,
"max": 0.1373613476753235,
"mean": -0.0025095485616475344,
"std": 0.051287971436977386,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4670821726322174,
"max": 0.9539185762405396,
"mean": 0.6688235998153687,
"std": 0.05267348513007164,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.3240530490875244,
"max": 0.30894580483436584,
"mean": -9.802424756344408e-07,
"std": 0.04094521328806877,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12482603639364243,
"max": 0.025560826063156128,
"mean": -0.030691375955939293,
"std": 0.01981331594288349,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.4391370117664337,
"max": 0.4447336196899414,
"mean": 9.505114576313645e-05,
"std": 0.03511868044734001,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22435998916625977,
"max": 0.051745057106018066,
"mean": -0.0011790611315518618,
"std": 0.018466567620635033,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.339127779006958,
"max": 0.7379522323608398,
"mean": 0.5586450695991516,
"std": 0.041346412152051926,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27276721596717834,
"max": 0.2783542275428772,
"mean": 2.0316545487730764e-05,
"std": 0.04105677455663681,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13677620887756348,
"max": 0.13981792330741882,
"mean": 0.0004895473830401897,
"std": 0.026616644114255905,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.4901849925518036,
"max": 0.3555382788181305,
"mean": 8.898908708943054e-05,
"std": 0.04069453105330467,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.2957122325897217,
"max": 1.7441315650939941,
"mean": -0.02107611857354641,
"std": 0.5000779628753662,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.2175905406475067,
"max": 0.19755098223686218,
"mean": -4.055129102198407e-05,
"std": 0.03423253819346428,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.041273877024650574,
"max": 0.038862332701683044,
"mean": -0.0001397906889906153,
"std": 0.012886369600892067,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17747005820274353,
"max": 0.1828984022140503,
"mean": 4.791315950569697e-05,
"std": 0.03155587986111641,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.17983144521713257,
"max": 0.1835365742444992,
"mean": -0.0022142226807773113,
"std": 0.054839469492435455,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4742608368396759,
"max": 1.0234043598175049,
"mean": 0.645187497138977,
"std": 0.050187092274427414,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.2714308202266693,
"max": 0.3094487190246582,
"mean": 0.00011228019138798118,
"std": 0.04068155214190483,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.1052371934056282,
"max": 0.026651456952095032,
"mean": -0.029516855254769325,
"std": 0.017926618456840515,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.33875298500061035,
"max": 0.3289111852645874,
"mean": 5.248367233434692e-05,
"std": 0.03441265597939491,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.1814928501844406,
"max": 0.04225185513496399,
"mean": -0.0010585930431261659,
"std": 0.017206743359565735,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.325328528881073,
"max": 0.6851887106895447,
"mean": 0.5111891627311707,
"std": 0.03689680993556976,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.2336086481809616,
"max": 0.2251969277858734,
"mean": -3.625164390541613e-05,
"std": 0.039176031947135925,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11540839821100235,
"max": 0.13177232444286346,
"mean": 0.00015377491945400834,
"std": 0.029171116650104523,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.35232973098754883,
"max": 0.2849805951118469,
"mean": 6.946377197891707e-06,
"std": 0.0392446406185627,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.128444194793701,
"max": 3.5404324531555176,
"mean": -0.011580632999539375,
"std": 0.6822744011878967,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21085655689239502,
"max": 0.20925314724445343,
"mean": 3.461689630057663e-05,
"std": 0.03448476642370224,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03582029417157173,
"max": 0.0481770783662796,
"mean": 0.000791961036156863,
"std": 0.012865905649960041,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.2102348804473877,
"max": 0.19295428693294525,
"mean": -1.266141225642059e-06,
"std": 0.03169584646821022,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18637949228286743,
"max": 0.17694726586341858,
"mean": -0.0028348618652671576,
"std": 0.058624111115932465,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.47455769777297974,
"max": 1.0399035215377808,
"mean": 0.6513059735298157,
"std": 0.049517374485731125,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.2480839341878891,
"max": 0.32886141538619995,
"mean": 0.00018076057313010097,
"std": 0.040569957345724106,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12484849989414215,
"max": 0.024815550073981285,
"mean": -0.030500907450914383,
"std": 0.01760847680270672,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.42022550106048584,
"max": 0.4810453951358795,
"mean": -1.3774351828033105e-06,
"std": 0.03539680689573288,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15139424800872803,
"max": 0.04337864741683006,
"mean": 4.9671380111249164e-05,
"std": 0.014884358271956444,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.3155671954154968,
"max": 0.6806262135505676,
"mean": 0.5528896450996399,
"std": 0.04069091007113457,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20614612102508545,
"max": 0.2194698005914688,
"mean": 3.180014027748257e-05,
"std": 0.038299210369586945,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13776730000972748,
"max": 0.11263402551412582,
"mean": 2.7509784558787942e-05,
"std": 0.02582019381225109,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.4022030830383301,
"max": 0.3703415095806122,
"mean": 2.5775392714422196e-05,
"std": 0.03817988187074661,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.767340898513794,
"max": 2.8659963607788086,
"mean": 0.0011514686048030853,
"std": 0.5165835022926331,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20330490171909332,
"max": 0.1975128948688507,
"mean": 2.9661892767762765e-05,
"std": 0.03429696336388588,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.05067470669746399,
"max": 0.03985888883471489,
"mean": -0.0004201547708362341,
"std": 0.013416973873972893,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19610381126403809,
"max": 0.20185545086860657,
"mean": -1.2482038982852828e-05,
"std": 0.031804922968149185,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19282294809818268,
"max": 0.19485345482826233,
"mean": -0.0029612130019813776,
"std": 0.06253436952829361,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.3490590453147888,
"max": 1.081492304801941,
"mean": 0.6670613884925842,
"std": 0.05502287670969963,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22548414766788483,
"max": 0.2509278655052185,
"mean": 0.00035874126479029655,
"std": 0.04075963795185089,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.0911286398768425,
"max": 0.043736688792705536,
"mean": -0.03008149564266205,
"std": 0.017609886825084686,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.3527411222457886,
"max": 0.30355900526046753,
"mean": -4.3905802158406004e-05,
"std": 0.037122152745723724,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16155573725700378,
"max": 0.06323426961898804,
"mean": -8.016945503186435e-05,
"std": 0.019409824162721634,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34882256388664246,
"max": 0.7205829620361328,
"mean": 0.5423275232315063,
"std": 0.03903055191040039,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21910026669502258,
"max": 0.2230084389448166,
"mean": -1.1230863492528442e-05,
"std": 0.03923042118549347,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11831706017255783,
"max": 0.17028944194316864,
"mean": 0.0002854751655831933,
"std": 0.02510806918144226,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24612674117088318,
"max": 0.3002479076385498,
"mean": -3.693345206556842e-05,
"std": 0.03892989829182625,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.501706838607788,
"max": 3.7109532356262207,
"mean": 0.015846284106373787,
"std": 0.7818700075149536,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.2186352014541626,
"max": 0.2372058928012848,
"mean": -1.3363219295570161e-05,
"std": 0.03630276769399643,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04711708053946495,
"max": 0.05125221982598305,
"mean": 0.00047675782116129994,
"std": 0.013513283804059029,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.2137574851512909,
"max": 0.2170482724905014,
"mean": 5.6474542361684144e-05,
"std": 0.033615030348300934,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21112386882305145,
"max": 0.23111283779144287,
"mean": -0.005101324524730444,
"std": 0.06186835095286369,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36194419860839844,
"max": 1.0987720489501953,
"mean": 0.6991980671882629,
"std": 0.05339714512228966,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23452329635620117,
"max": 0.24459832906723022,
"mean": 0.0004634420620277524,
"std": 0.041268572211265564,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09795372933149338,
"max": 0.0681690126657486,
"mean": -0.031430941075086594,
"std": 0.018122123554348946,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.3014773726463318,
"max": 0.3510685861110687,
"mean": -8.210168743971735e-05,
"std": 0.04027429223060608,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15211886167526245,
"max": 0.14952634274959564,
"mean": 0.0002581052831374109,
"std": 0.023030627518892288,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9992543458938599,
"max": 1.000257968902588,
"mean": 0.9997284412384033,
"std": 0.00024261184444185346,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031257662922143936,
"max": 0.03125471994280815,
"mean": -1.929123027366586e-05,
"std": 0.018041206523776054,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.03122766688466072,
"max": 0.030988017097115517,
"mean": -0.0010841797338798642,
"std": 0.01795079931616783,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031254444271326065,
"max": 0.031258873641490936,
"mean": 3.5479256439430173e-06,
"std": 0.018041614443063736,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031154906377196312,
"max": 0.03117496706545353,
"mean": 0.0003339025133755058,
"std": 0.018063001334667206,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0006141028716228902,
"max": 0.0004136512288823724,
"mean": 1.3743268709731638e-06,
"std": 0.0001376789587084204,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9981284141540527,
"max": 1.001622200012207,
"mean": 0.9998474717140198,
"std": 0.0006079401355236769,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.032770540565252304,
"max": 0.032834719866514206,
"mean": -6.686397682642564e-06,
"std": 0.01804281771183014,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.032758843153715134,
"max": 0.03259320184588432,
"mean": -0.00013118298375047743,
"std": 0.017956331372261047,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.001173654804006219,
"max": 0.0011514672078192234,
"mean": 3.6397079838934587e-07,
"std": 0.00021431130880955607,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0005246364744380116,
"max": 0.000398451229557395,
"mean": 2.265020839331555e-06,
"std": 0.0001267467887373641,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.38304001092910767,
"max": 0.717822790145874,
"mean": 0.5806512236595154,
"std": 0.03879348561167717,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.2381902039051056,
"max": 0.1962050199508667,
"mean": 2.6112733394256793e-05,
"std": 0.03746553510427475,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11878937482833862,
"max": 0.16630207002162933,
"mean": 0.0009804379660636187,
"std": 0.027551008388400078,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.24597673118114471,
"max": 0.499647855758667,
"mean": -5.027425504522398e-05,
"std": 0.03762295842170715,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.9381461143493652,
"max": 3.7654519081115723,
"mean": -0.003569968044757843,
"std": 0.6810594201087952,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22724951803684235,
"max": 0.25177428126335144,
"mean": -1.1575086318771355e-05,
"std": 0.037434518337249756,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07160108536481857,
"max": 0.08055920898914337,
"mean": -0.0005123723531141877,
"std": 0.015660181641578674,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22791653871536255,
"max": 0.25741860270500183,
"mean": -2.8733527869917452e-05,
"std": 0.035421404987573624,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20038263499736786,
"max": 0.21485595405101776,
"mean": -0.005531632341444492,
"std": 0.06833721697330475,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4051814377307892,
"max": 1.186793327331543,
"mean": 0.7378474473953247,
"std": 0.055015575140714645,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.2207704335451126,
"max": 0.24539422988891602,
"mean": 0.0005212163086980581,
"std": 0.04133594036102295,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10323301702737808,
"max": 0.02423531748354435,
"mean": -0.03266426920890808,
"std": 0.018886635079979897,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.44897761940956116,
"max": 0.42180517315864563,
"mean": -0.0004341494059190154,
"std": 0.04689624160528183,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.25117069482803345,
"max": 0.46963006258010864,
"mean": 0.003201500279828906,
"std": 0.044517986476421356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3168058395385742,
"max": 0.3330129086971283,
"mean": -2.5202643882948905e-05,
"std": 0.021287493407726288,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.32449325919151306,
"max": 0.6839006543159485,
"mean": 0.5709657073020935,
"std": 0.04467146471142769,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16424405574798584,
"max": 0.1741371899843216,
"mean": -4.883421570411883e-05,
"std": 0.033180903643369675,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18656986951828003,
"max": 0.14275068044662476,
"mean": 4.2517087422311306e-05,
"std": 0.029676001518964767,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.3805179297924042,
"max": 0.24586445093154907,
"mean": -9.98385530692758e-06,
"std": 0.03276193141937256,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6520333290100098,
"max": 3.2866697311401367,
"mean": -0.01423930749297142,
"std": 0.984977662563324,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23466402292251587,
"max": 0.24725867807865143,
"mean": -1.800561039999593e-05,
"std": 0.04169729724526405,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07259472459554672,
"max": 0.15434128046035767,
"mean": 0.0006652789888903499,
"std": 0.02516855113208294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2662595510482788,
"max": 0.24813267588615417,
"mean": -1.5347548469435424e-05,
"std": 0.04013809189200401,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18939754366874695,
"max": 0.19454091787338257,
"mean": -0.0012339097447693348,
"std": 0.06667902320623398,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32912713289260864,
"max": 0.9980567097663879,
"mean": 0.7191190719604492,
"std": 0.05222564935684204,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23154447972774506,
"max": 0.2451959252357483,
"mean": 0.00018269156862515956,
"std": 0.04089995473623276,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11424808949232101,
"max": 0.01902252808213234,
"mean": -0.04247482866048813,
"std": 0.018848657608032227,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.3893679976463318,
"max": 0.4069530963897705,
"mean": -2.1458035917021334e-05,
"std": 0.04853350669145584,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6924692392349243,
"max": 0.4121605455875397,
"mean": 0.0008477990049868822,
"std": 0.06026294827461243,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0010412124684080482,
"max": 1.00050687789917,
"mean": 0.00048820613301359117,
"std": 0.02208906039595604,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9985182881355286,
"max": 1.000278115272522,
"mean": 0.9996296167373657,
"std": 0.0004832371196243912,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03125324100255966,
"max": 0.03125615417957306,
"mean": -2.1021265638410114e-05,
"std": 0.01803254708647728,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.03121461719274521,
"max": 0.031231539323925972,
"mean": -0.0006769909523427486,
"std": 0.017827048897743225,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03125639632344246,
"max": 0.031260956078767776,
"mean": -8.831522791297175e-06,
"std": 0.018031572923064232,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.03123198263347149,
"max": 0.031244853511452675,
"mean": -0.0007297562551684678,
"std": 0.017941949889063835,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.0004176551883574575,
"max": 0.0003318839881103486,
"mean": -3.140859689665376e-06,
"std": 0.00011632459791144356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9979198575019836,
"max": 1.0014318227767944,
"mean": 0.9994964599609375,
"std": 0.0006108160014264286,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03245294839143753,
"max": 0.032378438860177994,
"mean": -1.7318175196123775e-06,
"std": 0.018028022721409798,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.03213566541671753,
"max": 0.03115900792181492,
"mean": -0.0003739359090104699,
"std": 0.018043629825115204,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0012771300971508026,
"max": 0.0011123745935037732,
"mean": -8.958944022197102e-07,
"std": 0.00020973320351913571,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.00034164811950176954,
"max": 0.0002967154432553798,
"mean": -3.7618522128468612e-06,
"std": 0.00010472961730556563,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.2341979742050171,
"max": 0.27227067947387695,
"mean": 6.760874839528697e-06,
"std": 0.01880943961441517,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32133588194847107,
"max": 0.6926518678665161,
"mean": 0.5816141963005066,
"std": 0.04592034965753555,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.1816624104976654,
"max": 0.19737666845321655,
"mean": -1.1567326509975828e-05,
"std": 0.03318365663290024,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16045045852661133,
"max": 0.12930794060230255,
"mean": -0.0010751842055469751,
"std": 0.03413202986121178,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.3320204019546509,
"max": 0.31095007061958313,
"mean": -1.016673104459187e-05,
"std": 0.032234374433755875,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.795230388641357,
"max": 8.753500938415527,
"mean": 0.09339793026447296,
"std": 1.6184653043746948,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23359645903110504,
"max": 0.2416210174560547,
"mean": 4.149888991378248e-05,
"std": 0.04085618630051613,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07583926618099213,
"max": 0.06566201150417328,
"mean": 0.0004832554841414094,
"std": 0.01940709352493286,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24546822905540466,
"max": 0.23373769223690033,
"mean": -3.0527116905432194e-06,
"std": 0.03943083807826042,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16301113367080688,
"max": 0.16089561581611633,
"mean": 0.0016276519745588303,
"std": 0.06527570635080338,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.556946873664856,
"max": 0.9415686726570129,
"mean": 0.7127838134765625,
"std": 0.03996752202510834,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22765818238258362,
"max": 0.25477662682533264,
"mean": -4.5632557885255665e-05,
"std": 0.04057467356324196,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.1348292976617813,
"max": 0.022138668224215508,
"mean": -0.04134812578558922,
"std": 0.01838543266057968,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.42094686627388,
"max": 0.3921053111553192,
"mean": -4.4014304876327515e-06,
"std": 0.04778384044766426,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6069029569625854,
"max": 0.6509266495704651,
"mean": 0.0015840512933209538,
"std": 0.05682184174656868,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.25153595209121704,
"max": 0.320549339056015,
"mean": -6.0848738030472305e-06,
"std": 0.019612807780504227,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.35961607098579407,
"max": 0.6813214421272278,
"mean": 0.570705771446228,
"std": 0.04296967759728432,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22012382745742798,
"max": 0.17660681903362274,
"mean": -3.47153763868846e-05,
"std": 0.03429870679974556,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.1630830317735672,
"max": 0.23280400037765503,
"mean": 0.00036220261245034635,
"std": 0.03281139209866524,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.263581246137619,
"max": 0.23967352509498596,
"mean": -5.2856208640150726e-05,
"std": 0.03389754518866539,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.849710464477539,
"max": 5.085712909698486,
"mean": 0.043873172253370285,
"std": 1.2286995649337769,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24600939452648163,
"max": 0.25006523728370667,
"mean": 7.234106305986643e-05,
"std": 0.04398686811327934,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06254445016384125,
"max": 0.054417435079813004,
"mean": 0.0006422345177270472,
"std": 0.017186632379889488,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.28586557507514954,
"max": 0.2718929648399353,
"mean": -5.018173033022322e-05,
"std": 0.0429849736392498,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.1608622968196869,
"max": 0.17021305859088898,
"mean": -0.0028866538777947426,
"std": 0.05928993597626686,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.519731879234314,
"max": 0.9308202266693115,
"mean": 0.7133743166923523,
"std": 0.03828318044543266,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23790688812732697,
"max": 0.24848711490631104,
"mean": 0.00046475647832266986,
"std": 0.04045366868376732,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14495447278022766,
"max": 0.04111183062195778,
"mean": -0.039693139493465424,
"std": 0.020540453493595123,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5317410826683044,
"max": 0.581489622592926,
"mean": 5.736372258979827e-06,
"std": 0.04885946586728096,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5184876322746277,
"max": 0.4928899109363556,
"mean": 0.002365314168855548,
"std": 0.05342720076441765,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.27367857098579407,
"max": 0.3154536187648773,
"mean": 2.0265892999304924e-06,
"std": 0.020049458369612694,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36605367064476013,
"max": 0.7104601860046387,
"mean": 0.5931398272514343,
"std": 0.04595194756984711,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21068720519542694,
"max": 0.19896060228347778,
"mean": 3.061807728954591e-05,
"std": 0.03486604616045952,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18698948621749878,
"max": 0.20358456671237946,
"mean": 0.0009543596534058452,
"std": 0.03149386867880821,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.2894982397556305,
"max": 0.339619904756546,
"mean": -4.7122804971877486e-05,
"std": 0.034586917608976364,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.8732118606567383,
"max": 3.3837733268737793,
"mean": 0.014458216726779938,
"std": 0.8580982089042664,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.224315345287323,
"max": 0.24964982271194458,
"mean": -3.871130957122659e-06,
"std": 0.042229585349559784,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.055275678634643555,
"max": 0.04663092643022537,
"mean": -1.647317549213767e-05,
"std": 0.015846259891986847,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2928326427936554,
"max": 0.29024964570999146,
"mean": -7.346136044361629e-06,
"std": 0.04194441810250282,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12488731741905212,
"max": 0.2587108016014099,
"mean": -0.0032421478535979986,
"std": 0.05317580699920654,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4563259780406952,
"max": 0.8424069881439209,
"mean": 0.7054323554039001,
"std": 0.03509839251637459,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5117396712303162,
"max": 0.34794938564300537,
"mean": 0.00034281908301636577,
"std": 0.04019879177212715,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.1857415735721588,
"max": 0.03958635777235031,
"mean": -0.03938839212059975,
"std": 0.021348465234041214,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5434486865997314,
"max": 0.5551662445068359,
"mean": -7.160313543863595e-05,
"std": 0.050734180957078934,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5112110376358032,
"max": 0.6635048389434814,
"mean": 0.002443352248519659,
"std": 0.04949941858649254,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3325079083442688,
"max": 0.2651371359825134,
"mean": 3.4327572393522132e-06,
"std": 0.019386671483516693,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.3219457268714905,
"max": 0.7650159597396851,
"mean": 0.6510248780250549,
"std": 0.04531543329358101,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.24919819831848145,
"max": 0.21938340365886688,
"mean": -2.0984125512768514e-06,
"std": 0.03650059178471565,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32654333114624023,
"max": 0.2866538465023041,
"mean": -0.0006891752709634602,
"std": 0.03852362558245659,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.30977994203567505,
"max": 0.36965611577033997,
"mean": 6.506919453386217e-05,
"std": 0.03624110668897629,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.713971138000488,
"max": 5.803556442260742,
"mean": 0.03793709725141525,
"std": 1.412732481956482,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22124992311000824,
"max": 0.20528917014598846,
"mean": -7.50878534745425e-05,
"std": 0.042485084384679794,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07763200253248215,
"max": 0.05141681060194969,
"mean": -0.0009281833190470934,
"std": 0.01641252264380455,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.33066344261169434,
"max": 0.32909321784973145,
"mean": -4.5878937271481846e-06,
"std": 0.04279147461056709,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2844299376010895,
"max": 0.1119050681591034,
"mean": -0.001205054228194058,
"std": 0.0470142662525177,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.48612144589424133,
"max": 0.8848820328712463,
"mean": 0.7373377084732056,
"std": 0.03814017400145531,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.36209484934806824,
"max": 0.2740732431411743,
"mean": 5.125169991515577e-05,
"std": 0.04064430668950081,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.2473653107881546,
"max": 0.046401649713516235,
"mean": -0.03926541656255722,
"std": 0.02327280305325985,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6253157258033752,
"max": 0.5961773991584778,
"mean": -6.133734132163227e-05,
"std": 0.0531163364648819,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7087676525115967,
"max": 0.2656005322933197,
"mean": 0.0009179539047181606,
"std": 0.05120791867375374,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3432927131652832,
"max": 0.3036082684993744,
"mean": 1.7233912785741268e-07,
"std": 0.01913507841527462,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34983396530151367,
"max": 0.78127521276474,
"mean": 0.6388033628463745,
"std": 0.04922258108854294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20482076704502106,
"max": 0.20643775165081024,
"mean": -5.993415470584296e-05,
"std": 0.037695497274398804,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.2582729458808899,
"max": 0.2677401304244995,
"mean": -0.0004000938788522035,
"std": 0.04457787051796913,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3535555303096771,
"max": 0.3218846917152405,
"mean": -7.005222414591117e-06,
"std": 0.03720390424132347,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.2560133934021,
"max": 4.200046062469482,
"mean": -0.026399940252304077,
"std": 1.0062882900238037,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.2381831258535385,
"max": 0.24307270348072052,
"mean": -2.52762038144283e-05,
"std": 0.0432097353041172,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.0622570626437664,
"max": 0.05666593089699745,
"mean": 0.0003454152902122587,
"std": 0.014151728712022305,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.43709275126457214,
"max": 0.37350907921791077,
"mean": 1.4359582564793527e-05,
"std": 0.04412123188376427,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09637399762868881,
"max": 0.17579396069049835,
"mean": -0.00066028768196702,
"std": 0.035156894475221634,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4216686189174652,
"max": 1.067047357559204,
"mean": 0.7483223080635071,
"std": 0.04198553413152695,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.26631179451942444,
"max": 0.2965000867843628,
"mean": -7.944944081827998e-05,
"std": 0.040804266929626465,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.1849687099456787,
"max": 0.04366198182106018,
"mean": -0.03681465983390808,
"std": 0.025593994185328484,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4571255147457123,
"max": 0.4859236776828766,
"mean": 4.341108797234483e-05,
"std": 0.05420951172709465,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.28613921999931335,
"max": 0.5508683919906616,
"mean": -0.0008792161825112998,
"std": 0.04781510680913925,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2926841676235199,
"max": 0.3227182626724243,
"mean": 6.155195478640962e-06,
"std": 0.019968634471297264,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.29101473093032837,
"max": 0.7585480213165283,
"mean": 0.6508181095123291,
"std": 0.05212597921490669,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.24345757067203522,
"max": 0.2612913250923157,
"mean": -6.02660793447285e-06,
"std": 0.03961166366934776,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2671639025211334,
"max": 0.19983193278312683,
"mean": -0.0008803074015304446,
"std": 0.05174032971262932,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2718494236469269,
"max": 0.25337839126586914,
"mean": 4.495690518524498e-06,
"std": 0.0387086495757103,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.951557159423828,
"max": 15.930760383605957,
"mean": 0.03321323171257973,
"std": 1.9877210855484009,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.2069142907857895,
"max": 0.225667342543602,
"mean": -7.223337888717651e-05,
"std": 0.04055356606841087,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06923694908618927,
"max": 0.06314270943403244,
"mean": 0.00015547810471616685,
"std": 0.0147401699796319,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.4649372100830078,
"max": 0.3204408884048462,
"mean": 1.968499054783024e-05,
"std": 0.04058866575360298,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06409196555614471,
"max": 0.11513285338878632,
"mean": 0.0011910968460142612,
"std": 0.024711282923817635,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.374662309885025,
"max": 0.9300851821899414,
"mean": 0.7508615255355835,
"std": 0.04013195261359215,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2791317403316498,
"max": 0.2725660502910614,
"mean": -0.00016837481234688312,
"std": 0.040994856506586075,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.1984652727842331,
"max": 0.05115879327058792,
"mean": -0.03202404826879501,
"std": 0.02509358339011669,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6568311452865601,
"max": 0.5346067547798157,
"mean": -4.890329364570789e-05,
"std": 0.052846092730760574,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19282352924346924,
"max": 0.5817168354988098,
"mean": -0.0005141475703567266,
"std": 0.04106360301375389,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41765207052230835,
"max": 0.3718544840812683,
"mean": 6.159986696729902e-06,
"std": 0.02162080444395542,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21428614854812622,
"max": 0.7470263838768005,
"mean": 0.6495206356048584,
"std": 0.05435969680547714,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20919783413410187,
"max": 0.19538012146949768,
"mean": 4.023606743430719e-05,
"std": 0.03946175053715706,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.32906630635261536,
"max": 0.25917014479637146,
"mean": -0.003227022010833025,
"std": 0.05624230206012726,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.20558328926563263,
"max": 0.2543526589870453,
"mean": 5.4226169595494866e-05,
"std": 0.038564346730709076,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.239154815673828,
"max": 6.927591800689697,
"mean": 0.04829341918230057,
"std": 1.3845902681350708,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20949970185756683,
"max": 0.22989487648010254,
"mean": -5.106569460622268e-06,
"std": 0.0413125716149807,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04377944767475128,
"max": 0.035965293645858765,
"mean": 6.696500349789858e-07,
"std": 0.012799888849258423,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.39747685194015503,
"max": 0.3446802794933319,
"mean": -5.5516902648378164e-05,
"std": 0.0423889197409153,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.05503125116229057,
"max": 0.06271757930517197,
"mean": 0.00036430457839742303,
"std": 0.018672339618206024,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.35033905506134033,
"max": 1.0429264307022095,
"mean": 0.7893730998039246,
"std": 0.048677314072847366,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3334490656852722,
"max": 0.38581615686416626,
"mean": -0.00016950252756942064,
"std": 0.0414799265563488,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.1571725308895111,
"max": 0.059094030410051346,
"mean": -0.031832072883844376,
"std": 0.025125639513134956,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6957246661186218,
"max": 0.4681403636932373,
"mean": -8.918362436816096e-05,
"std": 0.051792457699775696,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24794545769691467,
"max": 0.32831111550331116,
"mean": -0.000254548795055598,
"std": 0.04142748937010765,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.286994069814682,
"max": 0.35009774565696716,
"mean": -2.1362816369219217e-06,
"std": 0.0242360457777977,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.1966284215450287,
"max": 0.7790648937225342,
"mean": 0.6702556014060974,
"std": 0.058683399111032486,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22847537696361542,
"max": 0.23085317015647888,
"mean": -1.998914376599714e-05,
"std": 0.04043750837445259,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.2196640521287918,
"max": 0.2406841218471527,
"mean": 0.0007778428844176233,
"std": 0.05581061542034149,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21546684205532074,
"max": 0.22625623643398285,
"mean": -7.170689787017182e-05,
"std": 0.039373625069856644,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.899069786071777,
"max": 9.061844825744629,
"mean": -0.0012379959225654602,
"std": 1.8475514650344849,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2690274119377136,
"max": 0.2585972249507904,
"mean": 4.365673885331489e-05,
"std": 0.038405876606702805,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05762965977191925,
"max": 0.057730112224817276,
"mean": 0.00035032647429034114,
"std": 0.014716975390911102,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.2643204629421234,
"max": 0.28830888867378235,
"mean": -6.177595059853047e-05,
"std": 0.03907199949026108,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04382891580462456,
"max": 0.03727584704756737,
"mean": -8.995864482130855e-05,
"std": 0.013357071205973625,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.3394246995449066,
"max": 1.0903522968292236,
"mean": 0.8637199997901917,
"std": 0.06381762027740479,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.4231264889240265,
"max": 0.41881492733955383,
"mean": 0.00031262467382475734,
"std": 0.04350043460726738,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21452167630195618,
"max": 0.1706276834011078,
"mean": -0.029481077566742897,
"std": 0.03191966935992241,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5986213088035583,
"max": 0.5590333342552185,
"mean": -0.00015086884377524257,
"std": 0.05344516038894653,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17835262417793274,
"max": 0.3764508068561554,
"mean": 0.0013586997520178556,
"std": 0.03730103746056557,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.3942283093929291,
"max": 0.3688967823982239,
"mean": 3.6990095395594835e-05,
"std": 0.028617417439818382,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2902565002441406,
"max": 0.8266182541847229,
"mean": 0.7055412530899048,
"std": 0.06787826120853424,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9262580275535583,
"max": 1.0264337062835693,
"mean": -2.6147403332288377e-05,
"std": 0.04762481153011322,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8780329823493958,
"max": 0.8147000074386597,
"mean": -0.0003064283519051969,
"std": 0.09549984335899353,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.2694474458694458,
"max": 0.2405342310667038,
"mean": -2.2794924007030204e-05,
"std": 0.03895170986652374,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.725736618041992,
"max": 22.834732055664062,
"mean": -0.09184679388999939,
"std": 4.068049430847168,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22741694748401642,
"max": 0.2447165697813034,
"mean": -2.5723496946739033e-05,
"std": 0.03863721713423729,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06024840846657753,
"max": 0.04582807794213295,
"mean": -0.00014292271225713193,
"std": 0.014692682772874832,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.337954580783844,
"max": 0.3742024004459381,
"mean": 7.330418156925589e-06,
"std": 0.04081300273537636,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04640491306781769,
"max": 0.19541829824447632,
"mean": 0.00027370243333280087,
"std": 0.013559137471020222,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.3744112551212311,
"max": 1.1277745962142944,
"mean": 0.8900341987609863,
"std": 0.06396359950304031,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.4476565718650818,
"max": 0.5421170592308044,
"mean": 2.477337693562731e-05,
"std": 0.04556567594408989,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.2238994538784027,
"max": 0.0882241502404213,
"mean": -0.03201638162136078,
"std": 0.03775238245725632,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7243073582649231,
"max": 0.6882233619689941,
"mean": 3.4276417864020914e-05,
"std": 0.05177783966064453,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.17440874874591827,
"max": 0.2182954102754593,
"mean": 4.099373472854495e-05,
"std": 0.0317707397043705,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.33985471725463867,
"max": 0.3734351098537445,
"mean": 4.3027404899476096e-05,
"std": 0.03413975238800049,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.31756407022476196,
"max": 1.2844599485397339,
"mean": 0.6014232039451599,
"std": 0.08331646770238876,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2830894887447357,
"max": 0.260119765996933,
"mean": -2.825315732479794e-06,
"std": 0.03598077595233917,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23531799018383026,
"max": 0.20526045560836792,
"mean": 0.00023797567700967193,
"std": 0.05601158365607262,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.43513408303260803,
"max": 0.324799120426178,
"mean": 2.434128509776201e-05,
"std": 0.03413143381476402,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.539924144744873,
"max": 7.305825233459473,
"mean": -0.007350243628025055,
"std": 0.6986610889434814,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.3433971107006073,
"max": 0.36268630623817444,
"mean": 0.00010339625441702083,
"std": 0.047828007489442825,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07370211184024811,
"max": 0.06033240258693695,
"mean": 0.0009340607211925089,
"std": 0.014942350797355175,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2555631995201111,
"max": 0.28619974851608276,
"mean": 4.566820280160755e-06,
"std": 0.04155479371547699,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05527225881814957,
"max": 0.0627666711807251,
"mean": 0.00013802105968352407,
"std": 0.0071632144972682,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49384805560112,
"max": 1.2211062908172607,
"mean": 1.0134272575378418,
"std": 0.11744718253612518,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.093487024307251,
"max": 1.046884298324585,
"mean": -4.944120883010328e-05,
"std": 0.052408553659915924,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22308824956417084,
"max": 0.17253872752189636,
"mean": -0.027238916605710983,
"std": 0.036325786262750626,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8834213018417358,
"max": 0.921511173248291,
"mean": -0.00014601324801333249,
"std": 0.05328161269426346,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17091798782348633,
"max": 0.3795103430747986,
"mean": 0.0033677970059216022,
"std": 0.039878927171230316,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7767993211746216,
"max": 0.7229223251342773,
"mean": 1.8964092305395752e-05,
"std": 0.04616083949804306,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3385705351829529,
"max": 1.4257850646972656,
"mean": 0.948320209980011,
"std": 0.20674099028110504,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7456356287002563,
"max": 1.7042957544326782,
"mean": 0.00022721664572600275,
"std": 0.1586850881576538,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.1983858346939087,
"max": 1.0988513231277466,
"mean": -0.009531477466225624,
"std": 0.20368283987045288,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4208756983280182,
"max": 0.4265652298927307,
"mean": 6.4577761804685e-05,
"std": 0.0480157844722271,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.72553825378418,
"max": 19.520837783813477,
"mean": -0.2481747567653656,
"std": 4.772479057312012,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32345694303512573,
"max": 0.4378505349159241,
"mean": -1.1984889169980306e-05,
"std": 0.04616131633520126,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03403974324464798,
"max": 0.03704509884119034,
"mean": 0.0006423466256819665,
"std": 0.012919273227453232,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7029122710227966,
"max": 0.6650063395500183,
"mean": 4.321677261032164e-05,
"std": 0.05788154527544975,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07217518985271454,
"max": 0.06747341901063919,
"mean": -0.00013201506226323545,
"std": 0.012908914126455784,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38026899099349976,
"max": 1.3915380239486694,
"mean": 1.0665700435638428,
"std": 0.2197078913450241,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6161525845527649,
"max": 0.7168518304824829,
"mean": 0.00011199730215594172,
"std": 0.058020394295454025,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21944588422775269,
"max": 0.22491848468780518,
"mean": 0.00621908949688077,
"std": 0.049715615808963776,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6295903325080872,
"max": 0.8891246914863586,
"mean": 1.184111533802934e-05,
"std": 0.023527733981609344,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5063257217407227,
"max": 0.4734645485877991,
"mean": -0.0030142185278236866,
"std": 0.06923094391822815,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.537803590297699,
"max": 1.1795684099197388,
"mean": 0.7827014327049255,
"std": 0.09878505766391754,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2665232717990875,
"max": 0.21241135895252228,
"mean": -0.00022294482914730906,
"std": 0.05399605259299278,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23782978951931,
"max": 0.014834473840892315,
"mean": -0.04395260661840439,
"std": 0.034306950867176056,
"sparsity": 0.0,
"shape": [
100
]
}
}
}