ck1 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
5d26333 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.429890900850296,
"max": 0.2975340783596039,
"mean": -0.002528043230995536,
"std": 0.042567234486341476,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06285920739173889,
"max": 0.10713651776313782,
"mean": 0.0006724470877088606,
"std": 0.03401060774922371,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4127168655395508,
"max": 0.8372595310211182,
"mean": -0.0001970978337340057,
"std": 0.024115173146128654,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11470083892345428,
"max": 0.3203592598438263,
"mean": -0.0009399179834872484,
"std": 0.019510779529809952,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.786435842514038,
"max": 2.8647964000701904,
"mean": -0.00036496162647381425,
"std": 0.6155204772949219,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.2788304090499878,
"max": 0.38129961490631104,
"mean": 0.00042573572136461735,
"std": 0.042747072875499725,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22175073623657227,
"max": 0.208872988820076,
"mean": -0.0044786068610847,
"std": 0.040869712829589844,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4284111559391022,
"max": 0.47638577222824097,
"mean": 4.7679491217422765e-06,
"std": 0.024512330070137978,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32299283146858215,
"max": 0.15659146010875702,
"mean": -0.04666333645582199,
"std": 0.051485899835824966,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.41033437848091125,
"max": 0.35466355085372925,
"mean": -0.00013342559395823628,
"std": 0.023606186732649803,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.2283795177936554,
"max": 0.2609671354293823,
"mean": -0.029088540002703667,
"std": 0.04924432560801506,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.25455695390701294,
"max": 0.8167241811752319,
"mean": 0.5252928733825684,
"std": 0.08043710887432098,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.29693663120269775,
"max": 0.26587796211242676,
"mean": -0.00042661806219257414,
"std": 0.03210223466157913,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09257981181144714,
"max": 0.12483392655849457,
"mean": 0.0006469582440331578,
"std": 0.02571757137775421,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.29060953855514526,
"max": 0.281120628118515,
"mean": -7.341133459703997e-05,
"std": 0.030930932611227036,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.8982954025268555,
"max": 5.813107013702393,
"mean": -0.009337348863482475,
"std": 1.2953522205352783,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.42515280842781067,
"max": 0.3437501788139343,
"mean": 9.81355260591954e-05,
"std": 0.029954733327031136,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028982222080230713,
"max": 0.027547072619199753,
"mean": -0.0003299822274129838,
"std": 0.012570270337164402,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4541594088077545,
"max": 0.44774138927459717,
"mean": 2.4147137082763948e-05,
"std": 0.02385564148426056,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08854468911886215,
"max": 0.09074825048446655,
"mean": 0.0022885985672473907,
"std": 0.019506951794028282,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.2667747437953949,
"max": 1.0526666641235352,
"mean": 0.5310115814208984,
"std": 0.10401110351085663,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5744121670722961,
"max": 0.6080161333084106,
"mean": -0.00042898603715002537,
"std": 0.038603950291872025,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.1828344166278839,
"max": 0.04558030515909195,
"mean": -0.02944895066320896,
"std": 0.04260854050517082,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1668061017990112,
"max": 1.6334388256072998,
"mean": 0.0003250878071412444,
"std": 0.02769906260073185,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.1617957502603531,
"max": 0.20511887967586517,
"mean": -0.021121997386217117,
"std": 0.027915872633457184,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22389063239097595,
"max": 0.8404398560523987,
"mean": 0.48753583431243896,
"std": 0.07487782090902328,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.25540560483932495,
"max": 0.30576375126838684,
"mean": -5.286063242238015e-06,
"std": 0.0334775373339653,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09518040716648102,
"max": 0.11029241979122162,
"mean": 7.437964086420834e-05,
"std": 0.026927735656499863,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.29654812812805176,
"max": 0.29580071568489075,
"mean": 5.465543654281646e-05,
"std": 0.03255033493041992,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.152629852294922,
"max": 5.073052883148193,
"mean": -0.014528467319905758,
"std": 1.1556384563446045,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.34482821822166443,
"max": 0.3431924283504486,
"mean": 7.847632514312863e-05,
"std": 0.030065450817346573,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.0359608419239521,
"max": 0.03339020535349846,
"mean": -0.00013936487084720284,
"std": 0.013043079525232315,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.31543099880218506,
"max": 0.37475085258483887,
"mean": -1.99221267394023e-05,
"std": 0.024063827469944954,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.1053055077791214,
"max": 0.12205620855093002,
"mean": -0.0019772218074649572,
"std": 0.028851687908172607,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.31148025393486023,
"max": 1.1159186363220215,
"mean": 0.6660937070846558,
"std": 0.09731028974056244,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8725345730781555,
"max": 0.6275786757469177,
"mean": 0.0016754826065152884,
"std": 0.04743966832756996,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27123701572418213,
"max": 0.034363195300102234,
"mean": -0.04658954590559006,
"std": 0.040568556636571884,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9233484268188477,
"max": 0.9644548296928406,
"mean": 0.001022880314849317,
"std": 0.040709808468818665,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14417493343353271,
"max": 0.07486628741025925,
"mean": -0.00909160915762186,
"std": 0.025672299787402153,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.24042263627052307,
"max": 0.7109521627426147,
"mean": 0.4471237063407898,
"std": 0.05905117839574814,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.2719106674194336,
"max": 0.29774755239486694,
"mean": 9.55516952672042e-06,
"std": 0.035470303148031235,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11921010911464691,
"max": 0.11835695803165436,
"mean": 0.0007637137896381319,
"std": 0.027623096480965614,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.28068092465400696,
"max": 0.2797088027000427,
"mean": -7.736143015790731e-05,
"std": 0.03509894013404846,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.503926992416382,
"max": 2.515892505645752,
"mean": 0.02668764814734459,
"std": 0.5862060785293579,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.22096332907676697,
"max": 0.2714470624923706,
"mean": 3.3548758437973447e-06,
"std": 0.030734958127141,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.0337090790271759,
"max": 0.03134975582361221,
"mean": 0.00010986338020302355,
"std": 0.012415189296007156,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.2351670116186142,
"max": 0.23143303394317627,
"mean": 5.6707456678850576e-05,
"std": 0.025697972625494003,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13545046746730804,
"max": 0.12696555256843567,
"mean": -0.00549742579460144,
"std": 0.03995845839381218,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.35431793332099915,
"max": 1.168055772781372,
"mean": 0.7104406356811523,
"std": 0.10342107713222504,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6171623468399048,
"max": 0.5538070201873779,
"mean": 0.0011603726306930184,
"std": 0.04612257331609726,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.187709778547287,
"max": 0.025375014171004295,
"mean": -0.03482068330049515,
"std": 0.028561368584632874,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1314054727554321,
"max": 0.9714292287826538,
"mean": 0.0003602738433983177,
"std": 0.0423499159514904,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5970888137817383,
"max": 0.06280609965324402,
"mean": -0.004877342376857996,
"std": 0.028585655614733696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.37514442205429077,
"max": 0.9365863800048828,
"mean": 0.5923141837120056,
"std": 0.06635680049657822,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3909958004951477,
"max": 0.36877286434173584,
"mean": 7.174501661211252e-05,
"std": 0.037190962582826614,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11852732300758362,
"max": 0.13606122136116028,
"mean": 0.0009374335058964789,
"std": 0.02925141341984272,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6188546419143677,
"max": 0.508575975894928,
"mean": 1.5391087799798697e-05,
"std": 0.03644438832998276,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.168816566467285,
"max": 8.769427299499512,
"mean": -0.10911353677511215,
"std": 1.696131944656372,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.2764376997947693,
"max": 0.2397889643907547,
"mean": 5.34953796886839e-05,
"std": 0.03261784091591835,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05230281502008438,
"max": 0.03951656445860863,
"mean": 8.823134703561664e-05,
"std": 0.01295400783419609,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23082277178764343,
"max": 0.23429568111896515,
"mean": -2.1679703422705643e-05,
"std": 0.0293941181153059,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20415563881397247,
"max": 0.1055976152420044,
"mean": -0.004027670249342918,
"std": 0.03260914608836174,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3398659825325012,
"max": 1.008574366569519,
"mean": 0.7007372975349426,
"std": 0.09649426490068436,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5645706057548523,
"max": 0.8320877552032471,
"mean": 0.00041511692688800395,
"std": 0.042306262999773026,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21099911630153656,
"max": 0.03097626566886902,
"mean": -0.032180383801460266,
"std": 0.026477735489606857,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7537994980812073,
"max": 0.7179465293884277,
"mean": -7.129359801183455e-06,
"std": 0.03684566915035248,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.2629236578941345,
"max": 0.10548774898052216,
"mean": -0.00303501239977777,
"std": 0.028845027089118958,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28467807173728943,
"max": 0.6921964883804321,
"mean": 0.49945610761642456,
"std": 0.04626332223415375,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.279328316450119,
"max": 0.23436570167541504,
"mean": -0.00011136279499623924,
"std": 0.03876578062772751,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15460819005966187,
"max": 0.12665635347366333,
"mean": -0.002232019789516926,
"std": 0.03342032432556152,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41363096237182617,
"max": 0.6597210764884949,
"mean": -2.0344648874015547e-05,
"std": 0.03910161554813385,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.231404781341553,
"max": 4.715085029602051,
"mean": -0.020485566928982735,
"std": 1.0069705247879028,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.2449151873588562,
"max": 0.20747897028923035,
"mean": 4.346559217083268e-05,
"std": 0.033968474715948105,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03452696651220322,
"max": 0.04465686157345772,
"mean": -1.5960962628014386e-05,
"std": 0.012621430680155754,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20041236281394958,
"max": 0.20551952719688416,
"mean": -2.960992424050346e-05,
"std": 0.031025830656290054,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.19978956878185272,
"max": 0.11348189413547516,
"mean": -0.002926791785284877,
"std": 0.034484151750802994,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.36731821298599243,
"max": 1.0521864891052246,
"mean": 0.6705360412597656,
"std": 0.06614020466804504,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.39791443943977356,
"max": 0.5023131966590881,
"mean": -3.831370850093663e-05,
"std": 0.04114069044589996,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.1279803365468979,
"max": 0.026696184650063515,
"mean": -0.030547261238098145,
"std": 0.021858656778931618,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.44846877455711365,
"max": 0.43229183554649353,
"mean": 8.759970660321414e-05,
"std": 0.034898921847343445,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.2670278549194336,
"max": 0.07220447063446045,
"mean": -0.0011172632221132517,
"std": 0.023101668804883957,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.2872157692909241,
"max": 0.6838868260383606,
"mean": 0.5244971513748169,
"std": 0.047394201159477234,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22190631926059723,
"max": 0.22351428866386414,
"mean": 1.5601781342411414e-05,
"std": 0.038955170661211014,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13637839257717133,
"max": 0.10904650390148163,
"mean": 0.0002307215763721615,
"std": 0.02925163321197033,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37520402669906616,
"max": 0.4367537200450897,
"mean": -9.730283636599779e-06,
"std": 0.03929009288549423,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8370232582092285,
"max": 4.988061904907227,
"mean": 0.0097434613853693,
"std": 0.8443066477775574,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22342386841773987,
"max": 0.21985094249248505,
"mean": -9.139148460235447e-08,
"std": 0.034415289759635925,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04353320971131325,
"max": 0.03576282411813736,
"mean": -0.0002566012553870678,
"std": 0.012079274281859398,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.2132977545261383,
"max": 0.18884801864624023,
"mean": -1.671975405770354e-05,
"std": 0.031542494893074036,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.1805061399936676,
"max": 0.12078476697206497,
"mean": -0.0024164910428225994,
"std": 0.041246652603149414,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.42202678322792053,
"max": 0.9410442113876343,
"mean": 0.6627340912818909,
"std": 0.056649643927812576,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.3713216483592987,
"max": 0.47501668334007263,
"mean": -8.242137118941173e-05,
"std": 0.04089945927262306,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20792357623577118,
"max": 0.027002831920981407,
"mean": -0.03024197369813919,
"std": 0.02132386527955532,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.33984270691871643,
"max": 0.7327128648757935,
"mean": 8.53092860779725e-05,
"std": 0.03477407246828079,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.23982134461402893,
"max": 0.050322338938713074,
"mean": -0.0011965972371399403,
"std": 0.020453661680221558,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3062271773815155,
"max": 0.6509252786636353,
"mean": 0.5250095725059509,
"std": 0.04592073708772659,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30402758717536926,
"max": 0.21729634702205658,
"mean": 7.005365478107706e-05,
"std": 0.03949893265962601,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14918896555900574,
"max": 0.13127601146697998,
"mean": 0.00036064194864593446,
"std": 0.030438335612416267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.25730884075164795,
"max": 0.20225763320922852,
"mean": 3.0886923923389986e-05,
"std": 0.03948678448796272,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.334343671798706,
"max": 2.3739240169525146,
"mean": -0.02623903937637806,
"std": 0.4496191143989563,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.1891229748725891,
"max": 0.21049852669239044,
"mean": 3.720186577993445e-05,
"std": 0.03480042889714241,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03178652375936508,
"max": 0.03553091734647751,
"mean": -0.0002019420498982072,
"std": 0.012286705896258354,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18846523761749268,
"max": 0.1703805774450302,
"mean": -6.774859502911568e-05,
"std": 0.032177072018384933,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13940556347370148,
"max": 0.13744769990444183,
"mean": -0.0025155385956168175,
"std": 0.051295846700668335,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4672105014324188,
"max": 0.9528681039810181,
"mean": 0.6688433885574341,
"std": 0.05244635045528412,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.3241286277770996,
"max": 0.3096275329589844,
"mean": -1.696625076874625e-06,
"std": 0.04095519334077835,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.1246853619813919,
"max": 0.025154586881399155,
"mean": -0.03071470744907856,
"std": 0.019795699045062065,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.43982067704200745,
"max": 0.44470375776290894,
"mean": 9.459229477215558e-05,
"std": 0.03512655198574066,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22400110960006714,
"max": 0.05141644552350044,
"mean": -0.0011801186483353376,
"std": 0.018454499542713165,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.33955061435699463,
"max": 0.7357662320137024,
"mean": 0.55861496925354,
"std": 0.04118064045906067,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.2722431421279907,
"max": 0.27798357605934143,
"mean": 1.9865790818585083e-05,
"std": 0.04106421023607254,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.1370246559381485,
"max": 0.1397887021303177,
"mean": 0.0004894830053672194,
"std": 0.026618896052241325,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.4905315637588501,
"max": 0.3558432161808014,
"mean": 8.873307524481788e-05,
"std": 0.04070229455828667,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.291904926300049,
"max": 1.7411547899246216,
"mean": -0.02105572447180748,
"std": 0.4997440576553345,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.2170916199684143,
"max": 0.19797761738300323,
"mean": -4.09621607104782e-05,
"std": 0.034239448606967926,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.0413656160235405,
"max": 0.038547735661268234,
"mean": -0.00015065219486132264,
"std": 0.012881237082183361,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17731794714927673,
"max": 0.18395833671092987,
"mean": 4.7481313231401145e-05,
"std": 0.03156236186623573,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.17941592633724213,
"max": 0.18339262902736664,
"mean": -0.0022199342492967844,
"std": 0.05482170730829239,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4743531346321106,
"max": 1.0208531618118286,
"mean": 0.6452549695968628,
"std": 0.04991196468472481,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.2717994153499603,
"max": 0.3095380365848541,
"mean": 0.00011231788084842265,
"std": 0.04069165140390396,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10581093281507492,
"max": 0.02687394618988037,
"mean": -0.029505720362067223,
"std": 0.01791212521493435,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.3386741280555725,
"max": 0.3290008306503296,
"mean": 5.870793393114582e-05,
"std": 0.03442065790295601,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18140022456645966,
"max": 0.041891518980264664,
"mean": -0.0010755020193755627,
"std": 0.017211386933922768,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.32555529475212097,
"max": 0.6836872696876526,
"mean": 0.5111882090568542,
"std": 0.03670286759734154,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.2333182841539383,
"max": 0.22538095712661743,
"mean": -3.595184534788132e-05,
"std": 0.03918481990695,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11544553935527802,
"max": 0.13142207264900208,
"mean": 0.00015133176930248737,
"std": 0.029199015349149704,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3520807921886444,
"max": 0.2848276197910309,
"mean": 7.631589141965378e-06,
"std": 0.03925250843167305,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.123228073120117,
"max": 3.5356757640838623,
"mean": -0.011553899385035038,
"std": 0.6816845536231995,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.2112175077199936,
"max": 0.20856595039367676,
"mean": 3.472584648989141e-05,
"std": 0.03449223190546036,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03566575422883034,
"max": 0.0481027290225029,
"mean": 0.0007965473923832178,
"std": 0.01284803170710802,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21010246872901917,
"max": 0.19273991882801056,
"mean": -1.5139250990614528e-06,
"std": 0.031702835112810135,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.1862909346818924,
"max": 0.17676132917404175,
"mean": -0.0028484249487519264,
"std": 0.0586179718375206,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.47472548484802246,
"max": 1.0383955240249634,
"mean": 0.6513745784759521,
"std": 0.049231819808483124,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24837423861026764,
"max": 0.3289947211742401,
"mean": 0.00018063507741317153,
"std": 0.04057996720075607,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.1235797256231308,
"max": 0.024505803361535072,
"mean": -0.0304916650056839,
"std": 0.01757434755563736,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.4211723804473877,
"max": 0.48196032643318176,
"mean": 1.983910806302447e-06,
"std": 0.03540581464767456,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.1518622189760208,
"max": 0.04325510933995247,
"mean": 3.965849464293569e-05,
"std": 0.014866944402456284,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31559497117996216,
"max": 0.6791313290596008,
"mean": 0.552861213684082,
"std": 0.040544018149375916,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20591191947460175,
"max": 0.21929602324962616,
"mean": 3.05178873531986e-05,
"std": 0.03830549493432045,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13762998580932617,
"max": 0.11262793093919754,
"mean": 2.1001505956519395e-05,
"std": 0.02581183984875679,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.4020220637321472,
"max": 0.3705553412437439,
"mean": 2.6537300072959624e-05,
"std": 0.03818797320127487,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.767557382583618,
"max": 2.8661978244781494,
"mean": 0.00114790303632617,
"std": 0.5165696144104004,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.2021435797214508,
"max": 0.19701559841632843,
"mean": 2.942326318589039e-05,
"std": 0.03430229425430298,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.051028795540332794,
"max": 0.03999846801161766,
"mean": -0.0004189596220385283,
"std": 0.01342750433832407,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19608062505722046,
"max": 0.20127296447753906,
"mean": -1.228029668709496e-05,
"std": 0.0318099670112133,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19270533323287964,
"max": 0.1945824921131134,
"mean": -0.0029681914020329714,
"std": 0.06255524605512619,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.34965983033180237,
"max": 1.0794146060943604,
"mean": 0.6671044826507568,
"std": 0.054688673466444016,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22492384910583496,
"max": 0.2511879801750183,
"mean": 0.0003592889988794923,
"std": 0.04076888784766197,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.0908823236823082,
"max": 0.04379650950431824,
"mean": -0.030081426724791527,
"std": 0.01758776418864727,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.35308927297592163,
"max": 0.3038119673728943,
"mean": -4.2369181755930185e-05,
"std": 0.03713066130876541,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16173776984214783,
"max": 0.06332767009735107,
"mean": -8.476080256514251e-05,
"std": 0.019383691251277924,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34886276721954346,
"max": 0.7204337120056152,
"mean": 0.5423545241355896,
"std": 0.03890771418809891,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.2189498394727707,
"max": 0.22237031161785126,
"mean": -1.0949186616926454e-05,
"std": 0.03923875838518143,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11818630248308182,
"max": 0.1705242395401001,
"mean": 0.0002858135849237442,
"std": 0.025103183463215828,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24609290063381195,
"max": 0.30029821395874023,
"mean": -3.647123230621219e-05,
"std": 0.03893830627202988,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5019514560699463,
"max": 3.711169481277466,
"mean": 0.015843264758586884,
"std": 0.7819090485572815,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21829943358898163,
"max": 0.23758333921432495,
"mean": -1.3816705177305266e-05,
"std": 0.03631007671356201,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04714132845401764,
"max": 0.051366791129112244,
"mean": 0.00047747697681188583,
"std": 0.01350868958979845,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21323293447494507,
"max": 0.2170214205980301,
"mean": 5.658239751937799e-05,
"std": 0.033622127026319504,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21135154366493225,
"max": 0.23155677318572998,
"mean": -0.005110344383865595,
"std": 0.06187622249126434,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36206167936325073,
"max": 1.097632884979248,
"mean": 0.6992448568344116,
"std": 0.05318887159228325,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23417295515537262,
"max": 0.2448265254497528,
"mean": 0.0004635582445189357,
"std": 0.04127749800682068,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09782810509204865,
"max": 0.06829667091369629,
"mean": -0.031430259346961975,
"std": 0.018095970153808594,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.30144715309143066,
"max": 0.3511406481266022,
"mean": -8.084578439593315e-05,
"std": 0.04028310999274254,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15208296477794647,
"max": 0.1494162231683731,
"mean": 0.0002504626754671335,
"std": 0.023021113127470016,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9953764081001282,
"max": 1.0005042552947998,
"mean": 0.9992995858192444,
"std": 0.00161725003272295,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031269513070583344,
"max": 0.031265489757061005,
"mean": -1.9295868696644902e-05,
"std": 0.018045131117105484,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031223546713590622,
"max": 0.0309983491897583,
"mean": -0.0010843857889994979,
"std": 0.017954815179109573,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.03126491606235504,
"max": 0.03126438334584236,
"mean": 3.5442317312117666e-06,
"std": 0.018045514822006226,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031160738319158554,
"max": 0.03118434175848961,
"mean": 0.00033380728564225137,
"std": 0.01806693710386753,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0004188704479020089,
"max": 0.00032652742811478674,
"mean": -3.7413692552945577e-06,
"std": 9.604167280485854e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9950032234191895,
"max": 1.000982403755188,
"mean": 0.9997574090957642,
"std": 0.0010362789034843445,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03225700929760933,
"max": 0.032385751605033875,
"mean": -9.290525667893235e-06,
"std": 0.01804504171013832,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03201417997479439,
"max": 0.03202167525887489,
"mean": 0.0002501691924408078,
"std": 0.018027769401669502,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.0008222123724408448,
"max": 0.0007597835501655936,
"mean": -1.4037771052244352e-06,
"std": 0.0001422762288711965,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0004344022599980235,
"max": 0.000338842801284045,
"mean": -5.246626642474439e-06,
"std": 8.8350752776023e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3827516734600067,
"max": 0.7182729244232178,
"mean": 0.5806694030761719,
"std": 0.03871554881334305,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23742133378982544,
"max": 0.19636878371238708,
"mean": 2.6759680622490123e-05,
"std": 0.037471406161785126,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.1184450015425682,
"max": 0.16545724868774414,
"mean": 0.0009931407403200865,
"std": 0.027538597583770752,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.2451958954334259,
"max": 0.49966853857040405,
"mean": -5.0392896810080856e-05,
"std": 0.0376293808221817,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.932778835296631,
"max": 3.76035213470459,
"mean": -0.003568061627447605,
"std": 0.6805727481842041,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22708982229232788,
"max": 0.2511258125305176,
"mean": -1.143130793934688e-05,
"std": 0.037441134452819824,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07165413349866867,
"max": 0.08049532026052475,
"mean": -0.0005234142299741507,
"std": 0.015659447759389877,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22785918414592743,
"max": 0.25734248757362366,
"mean": -2.8539496270241216e-05,
"std": 0.035427965223789215,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.19991812109947205,
"max": 0.214930921792984,
"mean": -0.005538000259548426,
"std": 0.06830835342407227,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.40544652938842773,
"max": 1.1868609189987183,
"mean": 0.7379507422447205,
"std": 0.05492096021771431,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.22111627459526062,
"max": 0.2460324913263321,
"mean": 0.0005210894159972668,
"std": 0.04134552925825119,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10342609882354736,
"max": 0.024193264544010162,
"mean": -0.03266071155667305,
"std": 0.018867699429392815,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.448818176984787,
"max": 0.4217819571495056,
"mean": -0.000431257882155478,
"std": 0.04690708965063095,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2508312165737152,
"max": 0.46896737813949585,
"mean": 0.00319076469168067,
"std": 0.04450752213597298,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3169752359390259,
"max": 0.33314692974090576,
"mean": -2.5337005354231223e-05,
"std": 0.021293330937623978,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.32465165853500366,
"max": 0.6822460889816284,
"mean": 0.5709546208381653,
"std": 0.04454142227768898,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16416817903518677,
"max": 0.1733636111021042,
"mean": -4.858425018028356e-05,
"std": 0.03318599984049797,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18635453283786774,
"max": 0.1423773616552353,
"mean": 4.034899757243693e-05,
"std": 0.02966292016208172,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.37941935658454895,
"max": 0.24537599086761475,
"mean": -1.0037202628154773e-05,
"std": 0.03276722505688667,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6522655487060547,
"max": 3.2869510650634766,
"mean": -0.014257419854402542,
"std": 0.9848745465278625,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23496949672698975,
"max": 0.24738511443138123,
"mean": -1.7606289475224912e-05,
"std": 0.04170484468340874,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07273464649915695,
"max": 0.15422259271144867,
"mean": 0.0006638166960328817,
"std": 0.025166962295770645,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2664797306060791,
"max": 0.248508021235466,
"mean": -1.5497178537771106e-05,
"std": 0.04014508053660393,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18958289921283722,
"max": 0.19478872418403625,
"mean": -0.0012272386811673641,
"std": 0.06668190658092499,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32911282777786255,
"max": 0.9983987808227539,
"mean": 0.7191941142082214,
"std": 0.0522039495408535,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23135632276535034,
"max": 0.24583274126052856,
"mean": 0.00018275347247254103,
"std": 0.04090878367424011,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11377062648534775,
"max": 0.018522411584854126,
"mean": -0.04246858134865761,
"std": 0.018818210810422897,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.3897111713886261,
"max": 0.40687721967697144,
"mean": -2.178383874706924e-05,
"std": 0.04854356870055199,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6922244429588318,
"max": 0.4119531214237213,
"mean": 0.0008513483917340636,
"std": 0.060246195644140244,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0007574164774268866,
"max": 1.0006382465362549,
"mean": 0.0004883571527898312,
"std": 0.022093627601861954,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.995638906955719,
"max": 1.000357985496521,
"mean": 0.9993537068367004,
"std": 0.001561639248393476,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03126733377575874,
"max": 0.031276635825634,
"mean": -2.102728103636764e-05,
"std": 0.01803644187748432,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.03121519461274147,
"max": 0.031229794025421143,
"mean": -0.000677098985761404,
"std": 0.017830997705459595,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03127024322748184,
"max": 0.03126488998532295,
"mean": -8.836910637910478e-06,
"std": 0.018035493791103363,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031232407316565514,
"max": 0.031246833503246307,
"mean": -0.0007298535201698542,
"std": 0.0179455429315567,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.00021961150923743844,
"max": 0.00025036477018147707,
"mean": -8.001849209904321e-07,
"std": 8.148775668814778e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.995234489440918,
"max": 1.0012273788452148,
"mean": 0.9999035596847534,
"std": 0.001056881621479988,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03210779279470444,
"max": 0.03193911164999008,
"mean": 5.988833436276764e-06,
"std": 0.018047882243990898,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.031279150396585464,
"max": 0.031749434769153595,
"mean": 0.00044275011168792844,
"std": 0.018095213919878006,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0007249970221891999,
"max": 0.0007807987276464701,
"mean": -3.5197314218748943e-07,
"std": 0.00014107293100096285,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.00022946292301639915,
"max": 0.00021843933791387826,
"mean": -1.2389690482450533e-06,
"std": 7.586943684145808e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23457658290863037,
"max": 0.2724316418170929,
"mean": 7.120183454389917e-06,
"std": 0.01881435327231884,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32128995656967163,
"max": 0.692602813243866,
"mean": 0.5816522836685181,
"std": 0.04586285352706909,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18137724697589874,
"max": 0.19706015288829803,
"mean": -1.1772945072152652e-05,
"std": 0.03318871185183525,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.1606057584285736,
"max": 0.12942680716514587,
"mean": -0.0010653780773282051,
"std": 0.03413666784763336,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.3314096927642822,
"max": 0.3108590841293335,
"mean": -1.029382929118583e-05,
"std": 0.03223954886198044,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.800930500030518,
"max": 8.760626792907715,
"mean": 0.09345310181379318,
"std": 1.6193360090255737,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23322908580303192,
"max": 0.24158968031406403,
"mean": 4.1257830162066966e-05,
"std": 0.040864504873752594,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07589098066091537,
"max": 0.06572694331407547,
"mean": 0.00047726332559250295,
"std": 0.019406452775001526,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24502328038215637,
"max": 0.23352351784706116,
"mean": -2.668632077984512e-06,
"std": 0.039439182728528976,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16295023262500763,
"max": 0.16059955954551697,
"mean": 0.0016356806736439466,
"std": 0.06525918841362,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.556554913520813,
"max": 0.9408271312713623,
"mean": 0.7128406167030334,
"std": 0.039769869297742844,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22860386967658997,
"max": 0.25511136651039124,
"mean": -4.539915607892908e-05,
"std": 0.04058451950550079,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13515348732471466,
"max": 0.02234305441379547,
"mean": -0.04134881868958473,
"std": 0.01836741715669632,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4212746024131775,
"max": 0.39222264289855957,
"mean": -4.234017978888005e-06,
"std": 0.047794174402952194,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6065890789031982,
"max": 0.6503084897994995,
"mean": 0.0015799436951056123,
"std": 0.056790802627801895,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.25144556164741516,
"max": 0.3204054832458496,
"mean": -5.961472197668627e-06,
"std": 0.019617972895503044,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.36011484265327454,
"max": 0.6801881790161133,
"mean": 0.5707067251205444,
"std": 0.04279083386063576,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22022095322608948,
"max": 0.17668727040290833,
"mean": -3.4830391086870804e-05,
"std": 0.034304577857255936,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16363094747066498,
"max": 0.2328542321920395,
"mean": 0.0003622955409809947,
"std": 0.03286634013056755,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.26301464438438416,
"max": 0.23922747373580933,
"mean": -5.2115137805230916e-05,
"std": 0.03390384837985039,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.843376159667969,
"max": 5.079013824462891,
"mean": 0.043839357793331146,
"std": 1.2277964353561401,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24616090953350067,
"max": 0.24996501207351685,
"mean": 7.23035482224077e-05,
"std": 0.04399650916457176,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06268942356109619,
"max": 0.054509397596120834,
"mean": 0.0006487497594207525,
"std": 0.017188087105751038,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2859387695789337,
"max": 0.27142879366874695,
"mean": -4.999006341677159e-05,
"std": 0.04299502447247505,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16028116643428802,
"max": 0.1701924204826355,
"mean": -0.00288166431710124,
"std": 0.05925562232732773,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5195892453193665,
"max": 0.9285021424293518,
"mean": 0.71345454454422,
"std": 0.03798013553023338,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23824341595172882,
"max": 0.24957609176635742,
"mean": 0.0004649516486097127,
"std": 0.040465425699949265,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.1440071016550064,
"max": 0.041583579033613205,
"mean": -0.03968297317624092,
"std": 0.020529083907604218,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5325517058372498,
"max": 0.5824555158615112,
"mean": 5.4546726460102946e-06,
"std": 0.04887215048074722,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5182770490646362,
"max": 0.4927639365196228,
"mean": 0.002359384670853615,
"std": 0.05340024083852768,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.27337488532066345,
"max": 0.3148258626461029,
"mean": 1.8105949948221678e-06,
"std": 0.020055659115314484,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36668556928634644,
"max": 0.7091761827468872,
"mean": 0.5931493639945984,
"std": 0.04574775695800781,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.2106715887784958,
"max": 0.1992705911397934,
"mean": 3.0829094612272456e-05,
"std": 0.03486945852637291,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18688145279884338,
"max": 0.2038576900959015,
"mean": 0.0009574516443535686,
"std": 0.03150374814867973,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.2888670563697815,
"max": 0.33895108103752136,
"mean": -4.766129131894559e-05,
"std": 0.03459092602133751,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.8705790042877197,
"max": 3.3815643787384033,
"mean": 0.014464044943451881,
"std": 0.8578398823738098,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.2241480052471161,
"max": 0.24975183606147766,
"mean": -4.014226306026103e-06,
"std": 0.04223877936601639,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.0549103245139122,
"max": 0.04695763811469078,
"mean": -1.4065793948248029e-05,
"std": 0.015847966074943542,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2923896610736847,
"max": 0.2908935844898224,
"mean": -7.1035901783034205e-06,
"std": 0.04195380210876465,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12478198111057281,
"max": 0.2591152787208557,
"mean": -0.003229282796382904,
"std": 0.053138162940740585,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.45623326301574707,
"max": 0.8426384925842285,
"mean": 0.7055743336677551,
"std": 0.034994304180145264,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5110324621200562,
"max": 0.3488520383834839,
"mean": 0.00034251363831572235,
"std": 0.04021010175347328,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18705947697162628,
"max": 0.03953401744365692,
"mean": -0.03937750309705734,
"std": 0.02131262607872486,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5440298318862915,
"max": 0.5563207864761353,
"mean": -7.213428762042895e-05,
"std": 0.050746381282806396,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5106754302978516,
"max": 0.662798285484314,
"mean": 0.002447732724249363,
"std": 0.04947002977132797,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.33220773935317993,
"max": 0.2652227580547333,
"mean": 3.882123110088287e-06,
"std": 0.01939382590353489,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.32238951325416565,
"max": 0.764789879322052,
"mean": 0.6509858965873718,
"std": 0.0451430045068264,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.24893951416015625,
"max": 0.219136044383049,
"mean": -2.739794126682682e-06,
"std": 0.036503732204437256,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32658451795578003,
"max": 0.28703945875167847,
"mean": -0.0006784016732126474,
"std": 0.038509681820869446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3096793591976166,
"max": 0.3693031072616577,
"mean": 6.47535634925589e-05,
"std": 0.036244187504053116,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.706123352050781,
"max": 5.793623447418213,
"mean": 0.03790595382452011,
"std": 1.4113690853118896,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22146277129650116,
"max": 0.20545163750648499,
"mean": -7.498646300518885e-05,
"std": 0.042494479566812515,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07756227254867554,
"max": 0.05129515379667282,
"mean": -0.0009279022924602032,
"std": 0.016406826674938202,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.33102676272392273,
"max": 0.3289909064769745,
"mean": -5.028288796893321e-06,
"std": 0.042801517993211746,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.28435027599334717,
"max": 0.111260324716568,
"mean": -0.001205979730002582,
"std": 0.04699746519327164,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4868572950363159,
"max": 0.8827712535858154,
"mean": 0.7374467849731445,
"std": 0.03787440061569214,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3608104884624481,
"max": 0.2736315429210663,
"mean": 5.1337454351596534e-05,
"std": 0.04065750911831856,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24695155024528503,
"max": 0.04662873595952988,
"mean": -0.039258524775505066,
"std": 0.023203320801258087,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6257067322731018,
"max": 0.5967472195625305,
"mean": -6.336745718726888e-05,
"std": 0.05312981456518173,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7091463208198547,
"max": 0.26562684774398804,
"mean": 0.0009212760487571359,
"std": 0.051211755722761154,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.34325337409973145,
"max": 0.30324116349220276,
"mean": 1.430171323590912e-07,
"std": 0.019143851473927498,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34994906187057495,
"max": 0.7801994681358337,
"mean": 0.6388012170791626,
"std": 0.04902452602982521,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20566730201244354,
"max": 0.2065981775522232,
"mean": -6.0025900893379e-05,
"std": 0.03770073875784874,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25845062732696533,
"max": 0.268261194229126,
"mean": -0.00040606403490528464,
"std": 0.04461587592959404,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3532998263835907,
"max": 0.3217300474643707,
"mean": -7.498586455767509e-06,
"std": 0.037208717316389084,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.249058246612549,
"max": 4.194725036621094,
"mean": -0.02638459950685501,
"std": 1.005539894104004,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.2386980652809143,
"max": 0.24372872710227966,
"mean": -2.586210030131042e-05,
"std": 0.04321879521012306,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.062367696315050125,
"max": 0.05657341331243515,
"mean": 0.0003560591721907258,
"std": 0.01414806954562664,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.43753641843795776,
"max": 0.37365373969078064,
"mean": 1.460490602767095e-05,
"std": 0.044131483882665634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09578664600849152,
"max": 0.17602641880512238,
"mean": -0.0006584142101928592,
"std": 0.0351262167096138,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.42189696431159973,
"max": 1.0643466711044312,
"mean": 0.7485300302505493,
"std": 0.04179271310567856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.265593945980072,
"max": 0.29676973819732666,
"mean": -7.866104715503752e-05,
"std": 0.04081883281469345,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18380795419216156,
"max": 0.04289933666586876,
"mean": -0.036790553480386734,
"std": 0.02553965151309967,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4579704999923706,
"max": 0.4863548278808594,
"mean": 4.272036676411517e-05,
"std": 0.05422580987215042,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.2855266034603119,
"max": 0.5506117939949036,
"mean": -0.0008784987148828804,
"std": 0.047787394374608994,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2924049496650696,
"max": 0.32256847620010376,
"mean": 5.68283303437056e-06,
"std": 0.01997658796608448,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.29146960377693176,
"max": 0.7568098902702332,
"mean": 0.6507450938224792,
"std": 0.05195383355021477,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.2434154599905014,
"max": 0.26121068000793457,
"mean": -5.642844371323008e-06,
"std": 0.039615679532289505,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2669755518436432,
"max": 0.19996695220470428,
"mean": -0.0008783398079685867,
"std": 0.051739659160375595,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.27164191007614136,
"max": 0.25313133001327515,
"mean": 5.889336534892209e-06,
"std": 0.03871198371052742,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.952698707580566,
"max": 15.9312744140625,
"mean": 0.03322799503803253,
"std": 1.9877989292144775,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20647653937339783,
"max": 0.2256641685962677,
"mean": -7.246333552757278e-05,
"std": 0.040561433881521225,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06935624778270721,
"max": 0.06306472420692444,
"mean": 0.00016317634435836226,
"std": 0.014748629182577133,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.4654642939567566,
"max": 0.31973931193351746,
"mean": 1.960094778041821e-05,
"std": 0.04059756174683571,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06414826959371567,
"max": 0.11558651179075241,
"mean": 0.0012002706062048674,
"std": 0.024707410484552383,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.3750652074813843,
"max": 0.9275709390640259,
"mean": 0.7511184215545654,
"std": 0.03999503329396248,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2787969410419464,
"max": 0.2728310525417328,
"mean": -0.00016816731658764184,
"std": 0.0410102978348732,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19773395359516144,
"max": 0.05162842571735382,
"mean": -0.03201429173350334,
"std": 0.025033777579665184,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6583139300346375,
"max": 0.5351659655570984,
"mean": -5.119909474160522e-05,
"std": 0.05286192148923874,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.1919519156217575,
"max": 0.5808603763580322,
"mean": -0.0005111135542392731,
"std": 0.04104519635438919,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.4174348711967468,
"max": 0.3718706965446472,
"mean": 6.703614417347126e-06,
"std": 0.021633952856063843,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21479681134223938,
"max": 0.7478918433189392,
"mean": 0.6493618488311768,
"std": 0.054201409220695496,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20870910584926605,
"max": 0.1947445124387741,
"mean": 4.020327469334006e-05,
"std": 0.03945876285433769,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.32888734340667725,
"max": 0.25908946990966797,
"mean": -0.003229741007089615,
"std": 0.05623537674546242,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.2056186944246292,
"max": 0.2540878653526306,
"mean": 5.3863834182266146e-05,
"std": 0.03856115788221359,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.228662967681885,
"max": 6.915782928466797,
"mean": 0.04823269695043564,
"std": 1.3832472562789917,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20932167768478394,
"max": 0.22993139922618866,
"mean": -4.4988796616962645e-06,
"std": 0.04132062569260597,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04368359223008156,
"max": 0.035936541855335236,
"mean": -1.0926916729658842e-05,
"std": 0.012798542156815529,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.3968988060951233,
"max": 0.34478238224983215,
"mean": -5.5305037676589563e-05,
"std": 0.04239818826317787,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.05508316308259964,
"max": 0.06261169910430908,
"mean": 0.0003532343253027648,
"std": 0.018669025972485542,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3511422276496887,
"max": 1.0404622554779053,
"mean": 0.7897100448608398,
"std": 0.048514608293771744,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3338225483894348,
"max": 0.38620951771736145,
"mean": -0.00016899823094718158,
"std": 0.04149709641933441,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15740133821964264,
"max": 0.058948904275894165,
"mean": -0.0318116769194603,
"std": 0.025069545954465866,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6960089206695557,
"max": 0.46894899010658264,
"mean": -8.237230940721929e-05,
"std": 0.05181308463215828,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24741840362548828,
"max": 0.3286932408809662,
"mean": -0.00026996995438821614,
"std": 0.04144337400794029,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.28653645515441895,
"max": 0.35008078813552856,
"mean": -2.9175917006796226e-06,
"std": 0.024247299879789352,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19693201780319214,
"max": 0.7785046696662903,
"mean": 0.670115053653717,
"std": 0.058539655059576035,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.228579580783844,
"max": 0.23089821636676788,
"mean": -2.1206951714702882e-05,
"std": 0.040444690734148026,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22008375823497772,
"max": 0.24102427065372467,
"mean": 0.0007767346687614918,
"std": 0.055866289883852005,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21646404266357422,
"max": 0.2256259322166443,
"mean": -7.261607970576733e-05,
"std": 0.03937656059861183,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.884381294250488,
"max": 9.046843528747559,
"mean": -0.0012065814808011055,
"std": 1.8454406261444092,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2685357332229614,
"max": 0.2581280469894409,
"mean": 4.3568383262027055e-05,
"std": 0.03841337561607361,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.057995330542325974,
"max": 0.05802358686923981,
"mean": 0.00035532776382751763,
"std": 0.014707793481647968,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.2625483274459839,
"max": 0.2874881625175476,
"mean": -6.166227103676647e-05,
"std": 0.039080966264009476,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.0441780760884285,
"max": 0.03726305067539215,
"mean": -0.00010403832129668444,
"std": 0.013333701528608799,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.3392186760902405,
"max": 1.088745355606079,
"mean": 0.8640130758285522,
"std": 0.06376548111438751,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.42300641536712646,
"max": 0.41883379220962524,
"mean": 0.00031391510856337845,
"std": 0.04352227598428726,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21468287706375122,
"max": 0.1707322746515274,
"mean": -0.02942698448896408,
"std": 0.03183940798044205,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5976030826568604,
"max": 0.559415340423584,
"mean": -0.00014561890566255897,
"std": 0.05347010865807533,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17889779806137085,
"max": 0.3772771656513214,
"mean": 0.001343069365248084,
"std": 0.03730209544301033,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.39426180720329285,
"max": 0.36868590116500854,
"mean": 3.8257519918261096e-05,
"std": 0.0286222156137228,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2908227741718292,
"max": 0.8264791369438171,
"mean": 0.7054398655891418,
"std": 0.0677274614572525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.926691472530365,
"max": 1.0270028114318848,
"mean": -2.8848577130702324e-05,
"std": 0.04765753820538521,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.878186821937561,
"max": 0.8147233724594116,
"mean": -0.0002844139817170799,
"std": 0.09543365985155106,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.27030670642852783,
"max": 0.24055372178554535,
"mean": -2.2271982743404806e-05,
"std": 0.038951653987169266,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.68506622314453,
"max": 22.795772552490234,
"mean": -0.09177836775779724,
"std": 4.062017440795898,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22721800208091736,
"max": 0.24524104595184326,
"mean": -2.5419916710234247e-05,
"std": 0.038644734770059586,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.059977784752845764,
"max": 0.04509967938065529,
"mean": -0.00013076608593109995,
"std": 0.01468411460518837,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.3371436893939972,
"max": 0.3742288053035736,
"mean": 7.546843335148878e-06,
"std": 0.04082665964961052,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04609467089176178,
"max": 0.19514600932598114,
"mean": 0.00027449309709481895,
"std": 0.013541752472519875,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.37357744574546814,
"max": 1.125421166419983,
"mean": 0.8902103900909424,
"std": 0.06386467814445496,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.447258859872818,
"max": 0.5423630475997925,
"mean": 2.548232805565931e-05,
"std": 0.045591775327920914,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22343683242797852,
"max": 0.08690512925386429,
"mean": -0.03200257197022438,
"std": 0.03771420195698738,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7260164022445679,
"max": 0.6879873275756836,
"mean": 3.631926665548235e-05,
"std": 0.05180613696575165,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.17385190725326538,
"max": 0.21751302480697632,
"mean": 3.567736712284386e-05,
"std": 0.03174319490790367,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.3385016918182373,
"max": 0.37161216139793396,
"mean": 4.3165768147446215e-05,
"std": 0.0341353677213192,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.31760096549987793,
"max": 1.2830872535705566,
"mean": 0.6014329195022583,
"std": 0.08317635953426361,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.28283271193504333,
"max": 0.26012101769447327,
"mean": -2.921331542893313e-06,
"std": 0.035985857248306274,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23526135087013245,
"max": 0.20543411374092102,
"mean": 0.00024757458595559,
"std": 0.05601666867733002,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.4347652792930603,
"max": 0.32389530539512634,
"mean": 2.395988121861592e-05,
"std": 0.03412287309765816,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.541207790374756,
"max": 7.30653190612793,
"mean": -0.00736255943775177,
"std": 0.6987443566322327,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.3433501720428467,
"max": 0.361217200756073,
"mean": 0.0001032147411024198,
"std": 0.04784071072936058,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07378581166267395,
"max": 0.060352873057127,
"mean": 0.0009383288561366498,
"std": 0.01492984127253294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2561882436275482,
"max": 0.28616371750831604,
"mean": 5.244153726380318e-06,
"std": 0.04157177358865738,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05515698716044426,
"max": 0.062612384557724,
"mean": 0.00012199293996673077,
"std": 0.007132581900805235,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49441853165626526,
"max": 1.2188090085983276,
"mean": 1.013464331626892,
"std": 0.11732637882232666,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0939745903015137,
"max": 1.0476189851760864,
"mean": -4.830169564229436e-05,
"std": 0.05242462456226349,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22291964292526245,
"max": 0.17299318313598633,
"mean": -0.027209078893065453,
"std": 0.03627277910709381,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8831630349159241,
"max": 0.9219300150871277,
"mean": -0.00014596671098843217,
"std": 0.05330995097756386,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17071670293807983,
"max": 0.3785896301269531,
"mean": 0.0033629729878157377,
"std": 0.03981942683458328,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7773581147193909,
"max": 0.721552848815918,
"mean": 1.7906297216541134e-05,
"std": 0.0461493544280529,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.33866649866104126,
"max": 1.4223623275756836,
"mean": 0.9482957124710083,
"std": 0.20650897920131683,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7458724975585938,
"max": 1.7043527364730835,
"mean": 0.0002272979763802141,
"std": 0.1587107926607132,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.1964622735977173,
"max": 1.0986626148223877,
"mean": -0.009530629962682724,
"std": 0.20347940921783447,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.420305460691452,
"max": 0.42840367555618286,
"mean": 6.361818668665364e-05,
"std": 0.04802125319838524,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.700023651123047,
"max": 19.49565315246582,
"mean": -0.24793246388435364,
"std": 4.7666015625,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.3232991695404053,
"max": 0.4378996789455414,
"mean": -1.1727358469215687e-05,
"std": 0.04616958647966385,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.033631421625614166,
"max": 0.03664267063140869,
"mean": 0.0006392866489477456,
"std": 0.012905232608318329,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7025362849235535,
"max": 0.6701837778091431,
"mean": 4.212657222524285e-05,
"std": 0.057898350059986115,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07234562933444977,
"max": 0.06781232357025146,
"mean": -0.00013423134805634618,
"std": 0.012877929955720901,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.3804936408996582,
"max": 1.3917937278747559,
"mean": 1.0666232109069824,
"std": 0.21957866847515106,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6164886951446533,
"max": 0.7186930179595947,
"mean": 0.00011397639173083007,
"std": 0.05803186818957329,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21819192171096802,
"max": 0.22446297109127045,
"mean": 0.006146667059510946,
"std": 0.04965293034911156,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6298643350601196,
"max": 0.8897628784179688,
"mean": 1.269071981369052e-05,
"std": 0.023556767031550407,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.50624680519104,
"max": 0.4730708599090576,
"mean": -0.0030176215805113316,
"std": 0.06914978474378586,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5384271144866943,
"max": 1.1763767004013062,
"mean": 0.7825473546981812,
"std": 0.09825034439563751,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.26688942313194275,
"max": 0.21287617087364197,
"mean": -0.00022272299975156784,
"std": 0.0540103055536747,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23796546459197998,
"max": 0.014876163564622402,
"mean": -0.04389083757996559,
"std": 0.03420323133468628,
"sparsity": 0.0,
"shape": [
100
]
}
}
}