bar0 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
d416b09 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43111443519592285,
"max": 0.2988463342189789,
"mean": -0.0025462331250309944,
"std": 0.04255734384059906,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06311740726232529,
"max": 0.10821832716464996,
"mean": 0.0006233985768631101,
"std": 0.03409506380558014,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.41270628571510315,
"max": 0.8365904092788696,
"mean": -0.0002062078274320811,
"std": 0.024108584970235825,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11594842374324799,
"max": 0.323304146528244,
"mean": -0.0009396584937348962,
"std": 0.019620178267359734,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.8046321868896484,
"max": 2.8845088481903076,
"mean": -0.00036305765388533473,
"std": 0.615403413772583,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.2803097069263458,
"max": 0.3821697235107422,
"mean": 0.0004250165948178619,
"std": 0.042748384177684784,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22351907193660736,
"max": 0.21069680154323578,
"mean": -0.004498748108744621,
"std": 0.04097301885485649,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4281409978866577,
"max": 0.47565823793411255,
"mean": 3.041478066734271e-06,
"std": 0.024508286267518997,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32690364122390747,
"max": 0.15677706897258759,
"mean": -0.04671286791563034,
"std": 0.05161474645137787,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.41106897592544556,
"max": 0.3550392687320709,
"mean": -0.00012950549717061222,
"std": 0.023600473999977112,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.23076964914798737,
"max": 0.2638300061225891,
"mean": -0.029151970520615578,
"std": 0.049401458352804184,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.25456827878952026,
"max": 0.8219638466835022,
"mean": 0.525442898273468,
"std": 0.08086482435464859,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.2974269390106201,
"max": 0.26618602871894836,
"mean": -0.0004250289057381451,
"std": 0.0321008674800396,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09282378107309341,
"max": 0.12510952353477478,
"mean": 0.0006503364420495927,
"std": 0.025732681155204773,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.29088306427001953,
"max": 0.28188201785087585,
"mean": -7.563710096292198e-05,
"std": 0.030931729823350906,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.909866809844971,
"max": 5.824496746063232,
"mean": -0.009385589510202408,
"std": 1.2966406345367432,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4253852665424347,
"max": 0.34430131316185,
"mean": 9.75119328359142e-05,
"std": 0.02995217591524124,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028903231024742126,
"max": 0.027659673243761063,
"mean": -0.00031527443206869066,
"std": 0.012571859173476696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.45454347133636475,
"max": 0.44891107082366943,
"mean": 2.3480326490243897e-05,
"std": 0.023853568360209465,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08878406882286072,
"max": 0.09124661237001419,
"mean": 0.002279076725244522,
"std": 0.019516194239258766,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.2667350471019745,
"max": 1.0590577125549316,
"mean": 0.5311722159385681,
"std": 0.10455667227506638,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5753205418586731,
"max": 0.6092038154602051,
"mean": -0.0004317538405302912,
"std": 0.038596246391534805,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18242540955543518,
"max": 0.04575135558843613,
"mean": -0.02945941686630249,
"std": 0.04261056333780289,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.167878270149231,
"max": 1.6351370811462402,
"mean": 0.00032057490898296237,
"std": 0.02769383229315281,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.1625949591398239,
"max": 0.2059435099363327,
"mean": -0.02112039364874363,
"std": 0.027941575273871422,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22422762215137482,
"max": 0.8458681702613831,
"mean": 0.4875890910625458,
"std": 0.07528901100158691,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.2560153305530548,
"max": 0.3063727021217346,
"mean": -8.626433555036783e-06,
"std": 0.033470120280981064,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09546571969985962,
"max": 0.11066073924303055,
"mean": 5.8840945712290704e-05,
"std": 0.026972563937306404,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.2978975474834442,
"max": 0.29693126678466797,
"mean": 5.199259248911403e-05,
"std": 0.03254008665680885,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.169106960296631,
"max": 5.089260578155518,
"mean": -0.014622640796005726,
"std": 1.1580101251602173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.3452591896057129,
"max": 0.3437287509441376,
"mean": 7.87251628935337e-05,
"std": 0.030058259144425392,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03609376400709152,
"max": 0.03314271569252014,
"mean": -0.00014089577598497272,
"std": 0.013021372258663177,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3159167468547821,
"max": 0.37570273876190186,
"mean": -2.126370236510411e-05,
"std": 0.024055330082774162,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10549593716859818,
"max": 0.1221165731549263,
"mean": -0.0019639446400105953,
"std": 0.028849009424448013,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.3116210103034973,
"max": 1.1235315799713135,
"mean": 0.6662613153457642,
"std": 0.09780054539442062,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.872847855091095,
"max": 0.6278241872787476,
"mean": 0.0016755674732849002,
"std": 0.047437313944101334,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.2716394066810608,
"max": 0.03413696587085724,
"mean": -0.0466003455221653,
"std": 0.04061445966362953,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9222021102905273,
"max": 0.9650114178657532,
"mean": 0.0010224997531622648,
"std": 0.04070303216576576,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14480018615722656,
"max": 0.07504245638847351,
"mean": -0.00909046083688736,
"std": 0.025704393163323402,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.23979389667510986,
"max": 0.7145018577575684,
"mean": 0.4472465217113495,
"std": 0.059433478862047195,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.2733098268508911,
"max": 0.2983761131763458,
"mean": 9.066419806913473e-06,
"std": 0.03547072410583496,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11928554624319077,
"max": 0.11867407709360123,
"mean": 0.0007565614068880677,
"std": 0.02763325348496437,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.28173530101776123,
"max": 0.2804112136363983,
"mean": -7.68975296523422e-05,
"std": 0.03510041534900665,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.51193904876709,
"max": 2.5239455699920654,
"mean": 0.026779357343912125,
"std": 0.5869050621986389,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.2215055674314499,
"max": 0.2721182703971863,
"mean": 2.8998874768149108e-06,
"std": 0.030730824917554855,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03334304690361023,
"max": 0.031320393085479736,
"mean": 0.00011074724898207933,
"std": 0.012403324246406555,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23567309975624084,
"max": 0.2320062220096588,
"mean": 5.707715899916366e-05,
"std": 0.025695981457829475,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13582320511341095,
"max": 0.1279149055480957,
"mean": -0.005496869329363108,
"std": 0.03996486961841583,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3545507788658142,
"max": 1.1755321025848389,
"mean": 0.7105286121368408,
"std": 0.10380106419324875,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.618323802947998,
"max": 0.5557036995887756,
"mean": 0.0011603902094066143,
"std": 0.046115029603242874,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.18935386836528778,
"max": 0.024935415014624596,
"mean": -0.03484790399670601,
"std": 0.028624996542930603,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1329621076583862,
"max": 0.9724080562591553,
"mean": 0.00035803488572128117,
"std": 0.042342979460954666,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5985916256904602,
"max": 0.06294681131839752,
"mean": -0.0048767137341201305,
"std": 0.028625035658478737,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.37523797154426575,
"max": 0.9426477551460266,
"mean": 0.5925332903862,
"std": 0.06714636832475662,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3922964930534363,
"max": 0.37001147866249084,
"mean": 7.055637979647145e-05,
"std": 0.03718561306595802,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11894690245389938,
"max": 0.13649211823940277,
"mean": 0.0009205802925862372,
"std": 0.029216548427939415,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6203529834747314,
"max": 0.509852409362793,
"mean": 1.5258530766004696e-05,
"std": 0.03643907234072685,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.195601463317871,
"max": 8.798324584960938,
"mean": -0.10935366153717041,
"std": 1.6999714374542236,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.27709993720054626,
"max": 0.24029740691184998,
"mean": 5.252830669633113e-05,
"std": 0.032612841576337814,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05198528617620468,
"max": 0.03960206359624863,
"mean": 8.789013372734189e-05,
"std": 0.012959298677742481,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23129259049892426,
"max": 0.23536467552185059,
"mean": -2.1845989977009594e-05,
"std": 0.029389241710305214,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.2045416533946991,
"max": 0.10547658056020737,
"mean": -0.004024041350930929,
"std": 0.03263028338551521,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.33950191736221313,
"max": 1.0151382684707642,
"mean": 0.7007080316543579,
"std": 0.09671688079833984,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5657932162284851,
"max": 0.8349727988243103,
"mean": 0.00041512559982948005,
"std": 0.04229608178138733,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21222105622291565,
"max": 0.030380746349692345,
"mean": -0.03218400478363037,
"std": 0.026512378826737404,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7566999793052673,
"max": 0.7205860018730164,
"mean": -1.3569264410762116e-05,
"std": 0.036836523562669754,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.2636493444442749,
"max": 0.10622138530015945,
"mean": -0.0030191433615982533,
"std": 0.0288657546043396,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.284244179725647,
"max": 0.6968931555747986,
"mean": 0.49943026900291443,
"std": 0.046561453491449356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27927035093307495,
"max": 0.23469851911067963,
"mean": -0.00011116769746877253,
"std": 0.038758207112550735,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15420791506767273,
"max": 0.12671181559562683,
"mean": -0.002232905477285385,
"std": 0.03338504582643509,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41528424620628357,
"max": 0.6604220271110535,
"mean": -1.9215509382775053e-05,
"std": 0.03909698873758316,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.243428707122803,
"max": 4.728596210479736,
"mean": -0.020457647740840912,
"std": 1.0080652236938477,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24574802815914154,
"max": 0.20800377428531647,
"mean": 4.4111799070378765e-05,
"std": 0.0339629240334034,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03446226194500923,
"max": 0.04489393159747124,
"mean": -1.5458615962415934e-05,
"std": 0.012629742734134197,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.2015937864780426,
"max": 0.20673099160194397,
"mean": -2.9244030884001404e-05,
"std": 0.03102072887122631,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.20010024309158325,
"max": 0.11358015239238739,
"mean": -0.0029013892635703087,
"std": 0.03451463207602501,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.36685705184936523,
"max": 1.0600172281265259,
"mean": 0.6705178022384644,
"std": 0.06640052795410156,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.39914920926094055,
"max": 0.5031230449676514,
"mean": -3.865663893520832e-05,
"std": 0.04113178327679634,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12865233421325684,
"max": 0.026885882019996643,
"mean": -0.030540671199560165,
"std": 0.02188955619931221,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.4503399133682251,
"max": 0.4341718554496765,
"mean": 7.837524026399478e-05,
"std": 0.03489154577255249,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.2677534520626068,
"max": 0.07295451313257217,
"mean": -0.0010977284982800484,
"std": 0.023126663640141487,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28732216358184814,
"max": 0.687613844871521,
"mean": 0.5245327353477478,
"std": 0.047577910125255585,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22290916740894318,
"max": 0.22416770458221436,
"mean": 1.5896670447546057e-05,
"std": 0.03894934430718422,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13659609854221344,
"max": 0.10938586294651031,
"mean": 0.0002443990088067949,
"std": 0.029240434989333153,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37579256296157837,
"max": 0.43812817335128784,
"mean": -9.537441655993462e-06,
"std": 0.03928641602396965,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8499395847320557,
"max": 5.004647254943848,
"mean": 0.009758757427334785,
"std": 0.8455180525779724,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.2236318439245224,
"max": 0.22071507573127747,
"mean": -4.0232407627627254e-07,
"std": 0.034410055726766586,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04383794590830803,
"max": 0.03584868088364601,
"mean": -0.00026072480250149965,
"std": 0.012076611630618572,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21360361576080322,
"max": 0.1891404688358307,
"mean": -1.7133981600636616e-05,
"std": 0.03153670206665993,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18102218210697174,
"max": 0.12101027369499207,
"mean": -0.002398766577243805,
"std": 0.04126044735312462,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.422617107629776,
"max": 0.9454182982444763,
"mean": 0.6626853942871094,
"std": 0.05683305859565735,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.3716322183609009,
"max": 0.47696027159690857,
"mean": -8.185259503079578e-05,
"std": 0.040890805423259735,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.2088262289762497,
"max": 0.027207661420106888,
"mean": -0.03023664839565754,
"std": 0.021368583664298058,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3415319621562958,
"max": 0.735925555229187,
"mean": 8.314158185385168e-05,
"std": 0.034767184406518936,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.24044273793697357,
"max": 0.05069386586546898,
"mean": -0.0011902841506525874,
"std": 0.020465629175305367,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.30604928731918335,
"max": 0.6555026769638062,
"mean": 0.5250788331031799,
"std": 0.04609908536076546,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.3050762414932251,
"max": 0.21783104538917542,
"mean": 6.997165473876521e-05,
"std": 0.039496470242738724,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14947636425495148,
"max": 0.13131970167160034,
"mean": 0.00033609665115363896,
"std": 0.03047223575413227,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.2578710615634918,
"max": 0.20255950093269348,
"mean": 3.1238341762218624e-05,
"std": 0.03948673978447914,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.339573621749878,
"max": 2.379251480102539,
"mean": -0.02625335566699505,
"std": 0.4500052034854889,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.1892782300710678,
"max": 0.21099112927913666,
"mean": 3.7314141081878915e-05,
"std": 0.03479423746466637,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03169188275933266,
"max": 0.03571836277842522,
"mean": -0.00019686334417201579,
"std": 0.012292133644223213,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.1888986974954605,
"max": 0.17091436684131622,
"mean": -6.82127574691549e-05,
"std": 0.032170820981264114,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13952063024044037,
"max": 0.13709284365177155,
"mean": -0.0025128263514488935,
"std": 0.0512898713350296,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4670536518096924,
"max": 0.9585899710655212,
"mean": 0.6689007878303528,
"std": 0.05285040661692619,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.3248884379863739,
"max": 0.3098326325416565,
"mean": -1.0356043276260607e-06,
"std": 0.04094681516289711,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12497521936893463,
"max": 0.02554607018828392,
"mean": -0.030699055641889572,
"std": 0.019824611023068428,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.4409962594509125,
"max": 0.44632241129875183,
"mean": 9.430450154468417e-05,
"std": 0.03512001410126686,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22476668655872345,
"max": 0.051897041499614716,
"mean": -0.0011790284188464284,
"std": 0.018472088500857353,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.3393557369709015,
"max": 0.7416696548461914,
"mean": 0.5586937069892883,
"std": 0.04142747446894646,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.2734062075614929,
"max": 0.2793632745742798,
"mean": 2.0294006390031427e-05,
"std": 0.04105808213353157,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13707204163074493,
"max": 0.14009879529476166,
"mean": 0.0004904167726635933,
"std": 0.02664206363260746,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.49139103293418884,
"max": 0.35644298791885376,
"mean": 8.893347694538534e-05,
"std": 0.04069600626826286,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.2994801998138428,
"max": 1.7469841241836548,
"mean": -0.021084124222397804,
"std": 0.500186562538147,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.2184700220823288,
"max": 0.1981830596923828,
"mean": -4.060107676195912e-05,
"std": 0.03423382714390755,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.04127173125743866,
"max": 0.03881501033902168,
"mean": -0.00013771075464319438,
"std": 0.012880227528512478,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17825232446193695,
"max": 0.18374156951904297,
"mean": 4.785084456671029e-05,
"std": 0.031557004898786545,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.18023589253425598,
"max": 0.18417657911777496,
"mean": -0.002215688582509756,
"std": 0.05483615770936012,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4742925763130188,
"max": 1.0284452438354492,
"mean": 0.6453101634979248,
"std": 0.05053440108895302,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.27223968505859375,
"max": 0.30990350246429443,
"mean": 0.00011251836258452386,
"std": 0.04068317264318466,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10583628714084625,
"max": 0.02672600746154785,
"mean": -0.02951621636748314,
"std": 0.01793462224304676,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.339975506067276,
"max": 0.3303821086883545,
"mean": 5.460641114041209e-05,
"std": 0.034413956105709076,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.1819038987159729,
"max": 0.0424266941845417,
"mean": -0.0010654201032593846,
"std": 0.01721329055726528,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.3252944052219391,
"max": 0.688383936882019,
"mean": 0.5112100839614868,
"std": 0.036942265927791595,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.2345394641160965,
"max": 0.22607795894145966,
"mean": -3.624632518040016e-05,
"std": 0.039177343249320984,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11556069552898407,
"max": 0.13209758698940277,
"mean": 0.00015118884039111435,
"std": 0.029196659103035927,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3532617390155792,
"max": 0.2856779992580414,
"mean": 7.000558980507776e-06,
"std": 0.0392458438873291,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.136237621307373,
"max": 3.547076940536499,
"mean": -0.011597944423556328,
"std": 0.6828959584236145,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21137045323848724,
"max": 0.20969942212104797,
"mean": 3.464317342150025e-05,
"std": 0.03448577970266342,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03584721311926842,
"max": 0.048106979578733444,
"mean": 0.0007941541844047606,
"std": 0.012865344993770123,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.2109234631061554,
"max": 0.19350647926330566,
"mean": -1.076167109204107e-06,
"std": 0.03169678896665573,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18694967031478882,
"max": 0.17746947705745697,
"mean": -0.002843617694452405,
"std": 0.0586174838244915,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.474641889333725,
"max": 1.0443058013916016,
"mean": 0.6514294147491455,
"std": 0.0498916432261467,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24857543408870697,
"max": 0.3296365737915039,
"mean": 0.00018093036487698555,
"std": 0.040571410208940506,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12483743578195572,
"max": 0.024654541164636612,
"mean": -0.030496058985590935,
"std": 0.01760769635438919,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.4221171438694,
"max": 0.4831203818321228,
"mean": 1.3900153135182336e-06,
"std": 0.03539836406707764,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15169401466846466,
"max": 0.043601393699645996,
"mean": 4.186587466392666e-05,
"std": 0.014870981685817242,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31570297479629517,
"max": 0.6836181879043579,
"mean": 0.5528991222381592,
"std": 0.04067207872867584,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20685237646102905,
"max": 0.22020350396633148,
"mean": 3.1496565497945994e-05,
"std": 0.038300175219774246,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13801881670951843,
"max": 0.1128397211432457,
"mean": 1.9543484086170793e-05,
"std": 0.02582789771258831,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.4035792350769043,
"max": 0.37189632654190063,
"mean": 2.57877072726842e-05,
"std": 0.03818116337060928,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.776683807373047,
"max": 2.873103380203247,
"mean": 0.0011591403745114803,
"std": 0.5172097086906433,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20364898443222046,
"max": 0.19804270565509796,
"mean": 2.963895894936286e-05,
"std": 0.03429786115884781,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.05086854100227356,
"max": 0.03999151289463043,
"mean": -0.00042562291491776705,
"std": 0.01342119462788105,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19656670093536377,
"max": 0.20230703055858612,
"mean": -1.2472472008084878e-05,
"std": 0.031806014478206635,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19329077005386353,
"max": 0.1953459531068802,
"mean": -0.002963340375572443,
"std": 0.06254669278860092,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.3491152226924896,
"max": 1.0867162942886353,
"mean": 0.6672079563140869,
"std": 0.055482182651758194,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22604526579380035,
"max": 0.25199154019355774,
"mean": 0.00035888003185391426,
"std": 0.04076085984706879,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09107685089111328,
"max": 0.043750207871198654,
"mean": -0.030080880969762802,
"std": 0.017612501978874207,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.354022353887558,
"max": 0.3047710955142975,
"mean": -4.505186007008888e-05,
"std": 0.03712347894906998,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16208632290363312,
"max": 0.06347470730543137,
"mean": -7.683466537855566e-05,
"std": 0.01941368170082569,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34881117939949036,
"max": 0.7244766354560852,
"mean": 0.5423683524131775,
"std": 0.039119552820920944,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21985284984111786,
"max": 0.22366879880428314,
"mean": -1.1181864465470426e-05,
"std": 0.03923165425658226,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11856226623058319,
"max": 0.17077098786830902,
"mean": 0.0002904185967054218,
"std": 0.025113951414823532,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24732813239097595,
"max": 0.30149152874946594,
"mean": -3.663568713818677e-05,
"std": 0.03893101587891579,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.509943962097168,
"max": 3.719674825668335,
"mean": 0.015853645280003548,
"std": 0.7831405401229858,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21940433979034424,
"max": 0.2380109429359436,
"mean": -1.3181561371311545e-05,
"std": 0.036304209381341934,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04728918895125389,
"max": 0.05147355794906616,
"mean": 0.00047950932639651,
"std": 0.01351844146847725,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21457946300506592,
"max": 0.21772831678390503,
"mean": 5.6543191021773964e-05,
"std": 0.03361648693680763,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21175915002822876,
"max": 0.2316361367702484,
"mean": -0.005104508716613054,
"std": 0.06187352165579796,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36198312044143677,
"max": 1.1043850183486938,
"mean": 0.6993494629859924,
"std": 0.0538649819791317,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23541490733623505,
"max": 0.24545514583587646,
"mean": 0.0004635048389900476,
"std": 0.0412699356675148,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09819761663675308,
"max": 0.06812109053134918,
"mean": -0.03143283352255821,
"std": 0.018124457448720932,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.302616149187088,
"max": 0.3526079058647156,
"mean": -8.239349699579179e-05,
"std": 0.04027572274208069,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.1525425761938095,
"max": 0.14988082647323608,
"mean": 0.00025950101553462446,
"std": 0.02303888648748398,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9994731545448303,
"max": 1.0051331520080566,
"mean": 1.0006828308105469,
"std": 0.0018997839652001858,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031253598630428314,
"max": 0.03125074878334999,
"mean": -1.9291795979370363e-05,
"std": 0.018041806295514107,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031226053833961487,
"max": 0.030990969389677048,
"mean": -0.0010842140763998032,
"std": 0.01795150525867939,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.03125230595469475,
"max": 0.031255852431058884,
"mean": 3.5468428905005567e-06,
"std": 0.01804220862686634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031155752018094063,
"max": 0.031177222728729248,
"mean": 0.0003338717215228826,
"std": 0.018063681200146675,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.00039401825051754713,
"max": 0.00042413949267938733,
"mean": 2.811485501297284e-06,
"std": 0.00013175072672311217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9984285831451416,
"max": 1.0057381391525269,
"mean": 1.0001252889633179,
"std": 0.0012227989500388503,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03248310461640358,
"max": 0.03276699408888817,
"mean": -6.534818567160983e-06,
"std": 0.01804283820092678,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03245115652680397,
"max": 0.032321732491254807,
"mean": -6.833355291746557e-05,
"std": 0.017962154000997543,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.001046429155394435,
"max": 0.001021245145238936,
"mean": 1.2730889693557401e-06,
"std": 0.00019014839199371636,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.00038878852501511574,
"max": 0.0004429140826687217,
"mean": 4.41432621300919e-06,
"std": 0.00012222054647281766,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3831113874912262,
"max": 0.7217056155204773,
"mean": 0.5806930065155029,
"std": 0.03891616314649582,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23930218815803528,
"max": 0.19694408774375916,
"mean": 2.6163981601712294e-05,
"std": 0.03746587410569191,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11892960965633392,
"max": 0.16658687591552734,
"mean": 0.0009876482654362917,
"std": 0.027559131383895874,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.2469177097082138,
"max": 0.5011630058288574,
"mean": -5.039005191065371e-05,
"std": 0.037623330950737,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.9455182552337646,
"max": 3.7725064754486084,
"mean": -0.003572634421288967,
"std": 0.6815741658210754,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.2276747226715088,
"max": 0.25224873423576355,
"mean": -1.156590678874636e-05,
"std": 0.03743501380085945,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.0717209130525589,
"max": 0.08072538673877716,
"mean": -0.0005185012123547494,
"std": 0.01566058024764061,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.2281697541475296,
"max": 0.25840428471565247,
"mean": -2.8510152333183214e-05,
"std": 0.03542180359363556,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.2006748765707016,
"max": 0.21532072126865387,
"mean": -0.005526356864720583,
"std": 0.06832510232925415,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4052578806877136,
"max": 1.1931043863296509,
"mean": 0.7380141019821167,
"std": 0.05553331598639488,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.2216469943523407,
"max": 0.24624952673912048,
"mean": 0.0005209938390180469,
"std": 0.04133738949894905,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10345429182052612,
"max": 0.024157993495464325,
"mean": -0.03266732394695282,
"std": 0.018895410001277924,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.4506717622280121,
"max": 0.4234609603881836,
"mean": -0.00043505526264198124,
"std": 0.04689793288707733,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2517058551311493,
"max": 0.4705328345298767,
"mean": 0.0032054544426500797,
"std": 0.044538334012031555,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.31723225116729736,
"max": 0.3334876596927643,
"mean": -2.5067403839784674e-05,
"std": 0.021288011223077774,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.32461482286453247,
"max": 0.6871254444122314,
"mean": 0.5709946155548096,
"std": 0.044712185859680176,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16488447785377502,
"max": 0.174674391746521,
"mean": -4.878301842836663e-05,
"std": 0.033181823790073395,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18708285689353943,
"max": 0.14329394698143005,
"mean": 4.1025952668860555e-05,
"std": 0.02970319241285324,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.3814561367034912,
"max": 0.2463892698287964,
"mean": -9.789278919924982e-06,
"std": 0.03276311233639717,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6606388092041016,
"max": 3.2944271564483643,
"mean": -0.01427321694791317,
"std": 0.9851539731025696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23539957404136658,
"max": 0.2480521947145462,
"mean": -1.7979342374019325e-05,
"std": 0.04169878736138344,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07279200851917267,
"max": 0.15470217168331146,
"mean": 0.0006656068144366145,
"std": 0.02517576329410076,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2668735086917877,
"max": 0.2486240267753601,
"mean": -1.5421055650222115e-05,
"std": 0.04013972356915474,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18993628025054932,
"max": 0.19500213861465454,
"mean": -0.0012349991593509912,
"std": 0.06668674200773239,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32912659645080566,
"max": 1.003253698348999,
"mean": 0.7192496061325073,
"std": 0.052594345062971115,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.2322535365819931,
"max": 0.24589639902114868,
"mean": 0.00018273374007549137,
"std": 0.0409013107419014,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11447025835514069,
"max": 0.018959810957312584,
"mean": -0.04247897118330002,
"std": 0.018857870250940323,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.39094480872154236,
"max": 0.4085846245288849,
"mean": -2.156081063731108e-05,
"std": 0.0485350526869297,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6941088438034058,
"max": 0.413074254989624,
"mean": 0.0008494330104440451,
"std": 0.060315798968076706,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0010608690790832043,
"max": 1.0004838705062866,
"mean": 0.0004881545901298523,
"std": 0.0220896415412426,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9995023608207703,
"max": 1.004894495010376,
"mean": 1.0006191730499268,
"std": 0.0017806595424190164,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.031253229826688766,
"max": 0.0312533862888813,
"mean": -2.1022129658376798e-05,
"std": 0.018033137544989586,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.03121466003358364,
"max": 0.031230736523866653,
"mean": -0.0006770135369151831,
"std": 0.017827749252319336,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.031253378838300705,
"max": 0.03125477209687233,
"mean": -8.833090760163032e-06,
"std": 0.018032172694802284,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031231535598635674,
"max": 0.031244806945323944,
"mean": -0.0007297678967006505,
"std": 0.01794254779815674,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.00039897009264677763,
"max": 0.00031239030067808926,
"mean": -2.7656624297378585e-06,
"std": 0.00010500323696760461,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9984675645828247,
"max": 1.005997896194458,
"mean": 0.9998568296432495,
"std": 0.0012546924408525229,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.032396964728832245,
"max": 0.032092805951833725,
"mean": -3.513969204504974e-08,
"std": 0.018030446022748947,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.03191046044230461,
"max": 0.03107621893286705,
"mean": -0.00026303951744921505,
"std": 0.018048185855150223,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0011175514664500952,
"max": 0.0010112477466464043,
"mean": -6.1762216319039e-07,
"std": 0.0001866686943685636,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.0003427659103181213,
"max": 0.00032113981433212757,
"mean": -2.040310619122465e-06,
"std": 9.538298763800412e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23462186753749847,
"max": 0.27271148562431335,
"mean": 6.776777354389196e-06,
"std": 0.018810205161571503,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32134121656417847,
"max": 0.696171224117279,
"mean": 0.5816354155540466,
"std": 0.045965857803821564,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18210144340991974,
"max": 0.19822537899017334,
"mean": -1.1569689377211034e-05,
"std": 0.03318428248167038,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16075287759304047,
"max": 0.1296185702085495,
"mean": -0.0010708055924624205,
"std": 0.03414905443787575,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.33257541060447693,
"max": 0.31164395809173584,
"mean": -1.0188834494329058e-05,
"std": 0.03223486989736557,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.815314769744873,
"max": 8.776156425476074,
"mean": 0.09355179965496063,
"std": 1.6212124824523926,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.2341691255569458,
"max": 0.2423291653394699,
"mean": 4.1637467802502215e-05,
"std": 0.040857378393411636,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.0760289877653122,
"max": 0.065830759704113,
"mean": 0.00048469315515831113,
"std": 0.019415758550167084,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24639879167079926,
"max": 0.23466575145721436,
"mean": -3.0853516364004463e-06,
"std": 0.03943203389644623,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16285879909992218,
"max": 0.16076169908046722,
"mean": 0.0016295814421027899,
"std": 0.0652732104063034,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5568758845329285,
"max": 0.9466937184333801,
"mean": 0.7129064202308655,
"std": 0.0403011329472065,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22882379591464996,
"max": 0.25551655888557434,
"mean": -4.5426822907757014e-05,
"std": 0.0405760332942009,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.1351136714220047,
"max": 0.022313008084893227,
"mean": -0.04135293886065483,
"std": 0.01838735118508339,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4227588474750519,
"max": 0.3930455446243286,
"mean": -4.085732143721543e-06,
"std": 0.047785546630620956,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6080650687217712,
"max": 0.6521760821342468,
"mean": 0.0015855736564844847,
"std": 0.05685455724596977,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.2519088387489319,
"max": 0.3208920359611511,
"mean": -6.068687071092427e-06,
"std": 0.01961320824921131,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.3596932888031006,
"max": 0.6842364072799683,
"mean": 0.5706857442855835,
"std": 0.042946916073560715,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22081606090068817,
"max": 0.1773088276386261,
"mean": -3.454893158050254e-05,
"std": 0.03429890051484108,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.1636391431093216,
"max": 0.23335042595863342,
"mean": 0.00035607549943961203,
"std": 0.032843589782714844,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.26433637738227844,
"max": 0.24021653831005096,
"mean": -5.268204404274002e-05,
"std": 0.033897630870342255,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.859472751617432,
"max": 5.095940113067627,
"mean": 0.043871667236089706,
"std": 1.2294032573699951,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24689450860023499,
"max": 0.2507416307926178,
"mean": 7.20950702088885e-05,
"std": 0.04398806765675545,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.062653087079525,
"max": 0.05465509742498398,
"mean": 0.0006480686133727431,
"std": 0.01719220168888569,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.287101686000824,
"max": 0.27245277166366577,
"mean": -5.0120852392865345e-05,
"std": 0.04298638179898262,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16084662079811096,
"max": 0.17058779299259186,
"mean": -0.002887619426473975,
"std": 0.05928964540362358,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5198022723197937,
"max": 0.9352366328239441,
"mean": 0.7134757041931152,
"std": 0.03851567581295967,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23833467066287994,
"max": 0.24947485327720642,
"mean": 0.0004647623864002526,
"std": 0.040455412119627,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.1449345052242279,
"max": 0.041161470115184784,
"mean": -0.039693716913461685,
"std": 0.020549351349473,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5341992378234863,
"max": 0.584149181842804,
"mean": 5.933919965173118e-06,
"std": 0.048861313611269,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5195870399475098,
"max": 0.4941606819629669,
"mean": 0.0023631826043128967,
"std": 0.05346201732754707,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.27384015917778015,
"max": 0.3156191408634186,
"mean": 1.960434929060284e-06,
"std": 0.020050089806318283,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.3661290407180786,
"max": 0.7137707471847534,
"mean": 0.5931426286697388,
"std": 0.045923035591840744,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21142390370368958,
"max": 0.1996057629585266,
"mean": 3.067640500376001e-05,
"std": 0.034866977483034134,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18729116022586823,
"max": 0.20393171906471252,
"mean": 0.0009568152017891407,
"std": 0.031525619328022,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.29027533531188965,
"max": 0.34051838517189026,
"mean": -4.7230056225089356e-05,
"std": 0.03458789736032486,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.881865978240967,
"max": 3.3913497924804688,
"mean": 0.014454022981226444,
"std": 0.8585575819015503,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22494949400424957,
"max": 0.25041675567626953,
"mean": -3.845839273708407e-06,
"std": 0.0422312431037426,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.055274393409490585,
"max": 0.04683299362659454,
"mean": -1.701708242762834e-05,
"std": 0.015851490199565887,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.29334571957588196,
"max": 0.2907007336616516,
"mean": -7.57977295506862e-06,
"std": 0.04194618761539459,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.1247822642326355,
"max": 0.2594626247882843,
"mean": -0.0032404293306171894,
"std": 0.0531664676964283,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4562881588935852,
"max": 0.8474717736244202,
"mean": 0.7055672407150269,
"std": 0.035394009202718735,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5121109485626221,
"max": 0.34823864698410034,
"mean": 0.0003428200143389404,
"std": 0.04020027443766594,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.1863405406475067,
"max": 0.039554521441459656,
"mean": -0.03938986361026764,
"std": 0.02135385014116764,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5456476807594299,
"max": 0.5576444864273071,
"mean": -7.10671374690719e-05,
"std": 0.050736188888549805,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5122882723808289,
"max": 0.6650155782699585,
"mean": 0.0024437594693154097,
"std": 0.049542441964149475,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3326261341571808,
"max": 0.26606664061546326,
"mean": 3.3996070669672918e-06,
"std": 0.01938733644783497,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.32209691405296326,
"max": 0.7689979672431946,
"mean": 0.651018500328064,
"std": 0.045278150588274,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.25021034479141235,
"max": 0.22022569179534912,
"mean": -2.263453097839374e-06,
"std": 0.0365014486014843,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32728204131126404,
"max": 0.28722772002220154,
"mean": -0.0006871280493214726,
"std": 0.038576990365982056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3110663890838623,
"max": 0.37101635336875916,
"mean": 6.483237666543573e-05,
"std": 0.03624214604496956,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.721696376800537,
"max": 5.813023090362549,
"mean": 0.037980761379003525,
"std": 1.4134187698364258,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22233453392982483,
"max": 0.20630262792110443,
"mean": -7.52985361032188e-05,
"std": 0.0424862764775753,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07775042951107025,
"max": 0.051466166973114014,
"mean": -0.0009254277683794498,
"std": 0.0164100993424654,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.3309888541698456,
"max": 0.3296257257461548,
"mean": -4.630289367923979e-06,
"std": 0.04279271885752678,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2851186692714691,
"max": 0.11168244481086731,
"mean": -0.0012053586542606354,
"std": 0.04700839892029762,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4862569272518158,
"max": 0.8893836140632629,
"mean": 0.7374457716941833,
"std": 0.03831757605075836,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3624440133571625,
"max": 0.27509352564811707,
"mean": 5.130700083100237e-05,
"std": 0.040646348148584366,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24782374501228333,
"max": 0.04648653045296669,
"mean": -0.0392659492790699,
"std": 0.023277943953871727,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6279041171073914,
"max": 0.5983599424362183,
"mean": -6.208260310813785e-05,
"std": 0.05311836674809456,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7105586528778076,
"max": 0.266210675239563,
"mean": 0.0009207880357280374,
"std": 0.05124485120177269,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3435235619544983,
"max": 0.30372199416160583,
"mean": 2.971426056319615e-07,
"std": 0.019135644659399986,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34978553652763367,
"max": 0.7852374911308289,
"mean": 0.6388005018234253,
"std": 0.04921075701713562,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20607401430606842,
"max": 0.20750851929187775,
"mean": -5.96779900661204e-05,
"std": 0.037695422768592834,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.2588743567466736,
"max": 0.2684256136417389,
"mean": -0.00040556711610406637,
"std": 0.04462844133377075,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3547278344631195,
"max": 0.32300710678100586,
"mean": -6.988519089645706e-06,
"std": 0.03720381483435631,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.265876293182373,
"max": 4.207967281341553,
"mean": -0.026429325342178345,
"std": 1.0068732500076294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.2394271194934845,
"max": 0.24428503215312958,
"mean": -2.5281191483372822e-05,
"std": 0.04321092739701271,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06252460181713104,
"max": 0.056893154978752136,
"mean": 0.000347302237059921,
"std": 0.014152363874018192,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.4372415244579315,
"max": 0.3737826347351074,
"mean": 1.467342644900782e-05,
"std": 0.04412253573536873,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09628994017839432,
"max": 0.17628277838230133,
"mean": -0.0006604281952604651,
"std": 0.03514600917696953,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4217767119407654,
"max": 1.0722668170928955,
"mean": 0.7484005689620972,
"std": 0.04209807515144348,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2667092978954315,
"max": 0.2975556254386902,
"mean": -7.937644113553688e-05,
"std": 0.04080634191632271,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.1854698657989502,
"max": 0.04349794238805771,
"mean": -0.03681644797325134,
"std": 0.02560725063085556,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4579220414161682,
"max": 0.48784998059272766,
"mean": 4.282052395865321e-05,
"std": 0.05421200394630432,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.2866349518299103,
"max": 0.5520289540290833,
"mean": -0.0008793525630608201,
"std": 0.04783879220485687,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.29281285405158997,
"max": 0.32289794087409973,
"mean": 6.245412805583328e-06,
"std": 0.019969133660197258,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.29108351469039917,
"max": 0.7621498107910156,
"mean": 0.6508013010025024,
"std": 0.05207887664437294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.2440386265516281,
"max": 0.2621654272079468,
"mean": -5.880815479031298e-06,
"std": 0.03961231932044029,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2678271532058716,
"max": 0.2002498358488083,
"mean": -0.0008784097735770047,
"std": 0.05178229510784149,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.27257686853408813,
"max": 0.2541964650154114,
"mean": 4.526807060756255e-06,
"std": 0.038709431886672974,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.982023239135742,
"max": 15.968067169189453,
"mean": 0.03324813023209572,
"std": 1.9908379316329956,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.2077104151248932,
"max": 0.22651426494121552,
"mean": -7.221860869321972e-05,
"std": 0.040554750710725784,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06946562975645065,
"max": 0.06337178498506546,
"mean": 0.00015520014858338982,
"std": 0.01475033164024353,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46565988659858704,
"max": 0.3208334743976593,
"mean": 1.9561422959668562e-05,
"std": 0.040589939802885056,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.064049631357193,
"max": 0.11550958454608917,
"mean": 0.0011937393574044108,
"std": 0.02470548450946808,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.3747756779193878,
"max": 0.9347750544548035,
"mean": 0.7509442567825317,
"std": 0.04021797329187393,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2801269292831421,
"max": 0.27387121319770813,
"mean": -0.00016841593605931848,
"std": 0.040997058153152466,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19878797233104706,
"max": 0.05111948773264885,
"mean": -0.032027605921030045,
"std": 0.025102604180574417,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6596145033836365,
"max": 0.537032425403595,
"mean": -4.937778794555925e-05,
"std": 0.05284846946597099,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.1930496245622635,
"max": 0.5826522707939148,
"mean": -0.0005124770104885101,
"std": 0.04108353331685066,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41787075996398926,
"max": 0.37214192748069763,
"mean": 6.244237738428637e-06,
"std": 0.021621638908982277,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21441777050495148,
"max": 0.7472008466720581,
"mean": 0.6494799852371216,
"std": 0.05431411787867546,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20989972352981567,
"max": 0.19592680037021637,
"mean": 4.0151899156626314e-05,
"std": 0.039461154490709305,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.329771488904953,
"max": 0.25982508063316345,
"mean": -0.003228080226108432,
"std": 0.056280527263879776,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.2062487006187439,
"max": 0.2551846504211426,
"mean": 5.400779264164157e-05,
"std": 0.038563843816518784,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.2493767738342285,
"max": 6.938913345336914,
"mean": 0.04840244725346565,
"std": 1.3855851888656616,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.21009960770606995,
"max": 0.23065192997455597,
"mean": -5.2159043661959e-06,
"std": 0.041313353925943375,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.0439465157687664,
"max": 0.03601067140698433,
"mean": -2.0584266167134047e-06,
"std": 0.012799846939742565,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.39804428815841675,
"max": 0.34499886631965637,
"mean": -5.5499749578302726e-05,
"std": 0.04238968715071678,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.055174216628074646,
"max": 0.06293413788080215,
"mean": 0.00036305427784100175,
"std": 0.01867016963660717,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3503042459487915,
"max": 1.0480320453643799,
"mean": 0.7894532084465027,
"std": 0.048786185681819916,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3337661623954773,
"max": 0.3864375650882721,
"mean": -0.00016956219042185694,
"std": 0.04148184508085251,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15768638253211975,
"max": 0.05907022953033447,
"mean": -0.031832221895456314,
"std": 0.0251291636377573,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6973653435707092,
"max": 0.47017383575439453,
"mean": -8.81649466464296e-05,
"std": 0.051795393228530884,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24848268926143646,
"max": 0.32916560769081116,
"mean": -0.0002544308081269264,
"std": 0.041454534977674484,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2872900664806366,
"max": 0.3505076766014099,
"mean": -2.3586867428093683e-06,
"std": 0.024236176162958145,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19670914113521576,
"max": 0.7788708806037903,
"mean": 0.6702359914779663,
"std": 0.05864134803414345,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.2293103188276291,
"max": 0.23172836005687714,
"mean": -2.0263662008801475e-05,
"std": 0.04043755307793617,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.2201755940914154,
"max": 0.2412194311618805,
"mean": 0.0007778588915243745,
"std": 0.05583813413977623,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21645531058311462,
"max": 0.2269156575202942,
"mean": -7.186527363955975e-05,
"std": 0.03937343880534172,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.91368579864502,
"max": 9.076720237731934,
"mean": -0.0012592850252985954,
"std": 1.8490537405014038,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2699006199836731,
"max": 0.2594479024410248,
"mean": 4.3596926843747497e-05,
"std": 0.03840681165456772,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05783012881875038,
"max": 0.057821568101644516,
"mean": 0.0003521823091432452,
"std": 0.014716818928718567,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.26518943905830383,
"max": 0.2887333035469055,
"mean": -6.169862172100693e-05,
"std": 0.03907295688986778,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04396004229784012,
"max": 0.037220947444438934,
"mean": -9.395174856763333e-05,
"std": 0.013354334980249405,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.33940210938453674,
"max": 1.0958820581436157,
"mean": 0.8637964129447937,
"std": 0.06389264762401581,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.4235352873802185,
"max": 0.41927266120910645,
"mean": 0.000313018070301041,
"std": 0.04350249841809273,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21509824693202972,
"max": 0.17092689871788025,
"mean": -0.0294746495783329,
"std": 0.03193298354744911,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.6005915999412537,
"max": 0.5609812140464783,
"mean": -0.00015016092220321298,
"std": 0.05344870314002037,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17891772091388702,
"max": 0.3774968683719635,
"mean": 0.0013590974267572165,
"std": 0.03732309862971306,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.39461401104927063,
"max": 0.36924391984939575,
"mean": 3.7040204915683717e-05,
"std": 0.028616365045309067,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.29045382142066956,
"max": 0.8264784812927246,
"mean": 0.7055213451385498,
"std": 0.0678410679101944,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9263020753860474,
"max": 1.0267603397369385,
"mean": -2.6431953301653266e-05,
"std": 0.04762791842222214,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8796241879463196,
"max": 0.8164305686950684,
"mean": -0.0003041320014744997,
"std": 0.0956113338470459,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.27020347118377686,
"max": 0.241440087556839,
"mean": -2.271639823447913e-05,
"std": 0.038950297981500626,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.76431655883789,
"max": 22.871889114379883,
"mean": -0.09189724177122116,
"std": 4.073054313659668,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22821645438671112,
"max": 0.24578580260276794,
"mean": -2.5681954866740853e-05,
"std": 0.03863786533474922,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06044214218854904,
"max": 0.04586166515946388,
"mean": -0.00014234766422305256,
"std": 0.014693022705614567,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.3386403024196625,
"max": 0.3753957748413086,
"mean": 7.493808880099095e-06,
"std": 0.04081406444311142,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04647579416632652,
"max": 0.19592434167861938,
"mean": 0.00027245082310400903,
"std": 0.01356989610940218,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.3743247389793396,
"max": 1.133009910583496,
"mean": 0.8900730609893799,
"std": 0.06399820744991302,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.44806551933288574,
"max": 0.5433648824691772,
"mean": 2.4754037440288812e-05,
"std": 0.04556819051504135,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22422385215759277,
"max": 0.08793910592794418,
"mean": -0.03202162683010101,
"std": 0.03776844963431358,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7274155616760254,
"max": 0.6907259225845337,
"mean": 3.4943295759148896e-05,
"std": 0.05178087204694748,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.17463494837284088,
"max": 0.2185920923948288,
"mean": 3.897436545230448e-05,
"std": 0.031783733516931534,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.34052687883377075,
"max": 0.37423866987228394,
"mean": 4.304847971070558e-05,
"std": 0.034138280898332596,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3175727128982544,
"max": 1.290410041809082,
"mean": 0.6015003323554993,
"std": 0.08363870531320572,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.28354600071907043,
"max": 0.260841429233551,
"mean": -3.130652658001054e-06,
"std": 0.035979557782411575,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23592722415924072,
"max": 0.2057497352361679,
"mean": 0.00023727506049908698,
"std": 0.056021153926849365,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.43595167994499207,
"max": 0.32549113035202026,
"mean": 2.434775342408102e-05,
"std": 0.034129101783037186,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.553627967834473,
"max": 7.324089527130127,
"mean": -0.007399275898933411,
"std": 0.7001854181289673,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.34464672207832336,
"max": 0.3639456331729889,
"mean": 0.0001033150329021737,
"std": 0.047829318791627884,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.0738968476653099,
"max": 0.060446880757808685,
"mean": 0.0009350795298814774,
"std": 0.014948361553251743,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2562582790851593,
"max": 0.28724488615989685,
"mean": 4.657229510485195e-06,
"std": 0.0415559858083725,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05538095533847809,
"max": 0.06288731843233109,
"mean": 0.00013551797019317746,
"std": 0.007167231757193804,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.4939861297607422,
"max": 1.2202398777008057,
"mean": 1.013412356376648,
"std": 0.1173911765217781,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0939209461212158,
"max": 1.0473735332489014,
"mean": -4.927456029690802e-05,
"std": 0.05241009593009949,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22382217645645142,
"max": 0.1730560064315796,
"mean": -0.027248641476035118,
"std": 0.03636055067181587,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8865154385566711,
"max": 0.9247081279754639,
"mean": -0.00014585975441150367,
"std": 0.0532848984003067,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17122139036655426,
"max": 0.38014623522758484,
"mean": 0.0033699313644319773,
"std": 0.03990361467003822,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7786033749580383,
"max": 0.7243013381958008,
"mean": 1.8795288269757293e-05,
"std": 0.046159159392118454,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3385763168334961,
"max": 1.4310884475708008,
"mean": 0.9482859969139099,
"std": 0.20665791630744934,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.745840311050415,
"max": 1.7046537399291992,
"mean": 0.00022703518334310502,
"std": 0.15869012475013733,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.2008079290390015,
"max": 1.1013628244400024,
"mean": -0.009554527699947357,
"std": 0.20401149988174438,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4215790033340454,
"max": 0.427647203207016,
"mean": 6.439993012463674e-05,
"std": 0.048017047345638275,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.76506996154785,
"max": 19.559972763061523,
"mean": -0.24841785430908203,
"std": 4.7801384925842285,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32463034987449646,
"max": 0.4392913281917572,
"mean": -1.1934026588278357e-05,
"std": 0.046162351965904236,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03394031897187233,
"max": 0.03703805059194565,
"mean": 0.0006406006286852062,
"std": 0.012916130013763905,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7043119668960571,
"max": 0.6668245792388916,
"mean": 4.3251380702713504e-05,
"std": 0.05788382515311241,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07238046824932098,
"max": 0.06770296394824982,
"mean": -0.00013378039875533432,
"std": 0.012917297892272472,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38019153475761414,
"max": 1.391236424446106,
"mean": 1.0665456056594849,
"std": 0.21965359151363373,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6170499324798584,
"max": 0.718601405620575,
"mean": 0.00011217871360713616,
"std": 0.058021701872348785,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21975933015346527,
"max": 0.22518815100193024,
"mean": 0.006216429639607668,
"std": 0.049728311598300934,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6300503015518188,
"max": 0.8897712826728821,
"mean": 1.1653193723759614e-05,
"std": 0.023531364277005196,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5075116753578186,
"max": 0.47451627254486084,
"mean": -0.0030209918040782213,
"std": 0.06935632228851318,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5379416942596436,
"max": 1.1812505722045898,
"mean": 0.7826943397521973,
"std": 0.0987553521990776,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.26785895228385925,
"max": 0.21342454850673676,
"mean": -0.0002236703730886802,
"std": 0.05399824678897858,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23829060792922974,
"max": 0.014859253540635109,
"mean": -0.043948449194431305,
"std": 0.034328024834394455,
"sparsity": 0.0,
"shape": [
100
]
}
}
}