guu1 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
ace9001 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43091416358947754,
"max": 0.2991102933883667,
"mean": -0.002557656727731228,
"std": 0.04255230724811554,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06317874044179916,
"max": 0.10845368355512619,
"mean": 0.0006046494818292558,
"std": 0.0341438427567482,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4125913977622986,
"max": 0.8363389372825623,
"mean": -0.0002094925002893433,
"std": 0.024107541888952255,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11634448170661926,
"max": 0.32392504811286926,
"mean": -0.0009387563331983984,
"std": 0.019654380157589912,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.8076894283294678,
"max": 2.8856873512268066,
"mean": -0.0003593244473449886,
"std": 0.6153794527053833,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.2804395258426666,
"max": 0.38235825300216675,
"mean": 0.00042111962102353573,
"std": 0.0427500456571579,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22397927939891815,
"max": 0.21124881505966187,
"mean": -0.004504885524511337,
"std": 0.04102449491620064,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.42797791957855225,
"max": 0.4753724932670593,
"mean": 3.1681217933510197e-06,
"std": 0.024508841335773468,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.3278864026069641,
"max": 0.15815186500549316,
"mean": -0.046754755079746246,
"std": 0.05172203853726387,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4108750522136688,
"max": 0.3548462688922882,
"mean": -0.0001276329276151955,
"std": 0.023600950837135315,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.231490820646286,
"max": 0.26459917426109314,
"mean": -0.029202936217188835,
"std": 0.049504559487104416,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2546604871749878,
"max": 0.8254969120025635,
"mean": 0.5257646441459656,
"std": 0.08148879557847977,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.2975306808948517,
"max": 0.26634442806243896,
"mean": -0.0004239020636305213,
"std": 0.032103944569826126,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.093165822327137,
"max": 0.12537634372711182,
"mean": 0.0006500760791823268,
"std": 0.0257789958268404,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.2912229299545288,
"max": 0.2824551463127136,
"mean": -7.682169962208718e-05,
"std": 0.03093571960926056,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.9252495765686035,
"max": 5.839654445648193,
"mean": -0.00940663367509842,
"std": 1.2986583709716797,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4255436658859253,
"max": 0.34462970495224,
"mean": 9.765196591615677e-05,
"std": 0.02995290234684944,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028961628675460815,
"max": 0.027653951197862625,
"mean": -0.000311878917273134,
"std": 0.012572262436151505,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4547809660434723,
"max": 0.44922640919685364,
"mean": 2.2741787688573822e-05,
"std": 0.023854725062847137,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08907536417245865,
"max": 0.09154797345399857,
"mean": 0.0022746319882571697,
"std": 0.019537169486284256,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.2665960192680359,
"max": 1.0631530284881592,
"mean": 0.5315366387367249,
"std": 0.10529287159442902,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5752094984054565,
"max": 0.6091693043708801,
"mean": -0.0004337065329309553,
"std": 0.038595084100961685,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18266847729682922,
"max": 0.04574590548872948,
"mean": -0.02949558012187481,
"std": 0.042705073952674866,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.168283462524414,
"max": 1.6358791589736938,
"mean": 0.0003184601664543152,
"std": 0.027693841606378555,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.1632407307624817,
"max": 0.20662632584571838,
"mean": -0.02112644352018833,
"std": 0.027983704581856728,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.2244432270526886,
"max": 0.8492330312728882,
"mean": 0.4877929091453552,
"std": 0.07575991004705429,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.25644662976264954,
"max": 0.30648505687713623,
"mean": -9.105999197345227e-06,
"std": 0.03347046673297882,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09590143710374832,
"max": 0.11091545224189758,
"mean": 5.9943689848296344e-05,
"std": 0.02701094001531601,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.29843541979789734,
"max": 0.29746681451797485,
"mean": 5.037898154114373e-05,
"std": 0.0325385183095932,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.186855792999268,
"max": 5.106731414794922,
"mean": -0.014725911431014538,
"std": 1.1609561443328857,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.34537965059280396,
"max": 0.3438728153705597,
"mean": 7.886411185609177e-05,
"std": 0.030058259144425392,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.036315590143203735,
"max": 0.033395010977983475,
"mean": -0.00014420351362787187,
"std": 0.013025550171732903,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3161202371120453,
"max": 0.37616145610809326,
"mean": -2.1655154341715388e-05,
"std": 0.02405548468232155,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10574664920568466,
"max": 0.12242550402879715,
"mean": -0.0019548372365534306,
"std": 0.028876660391688347,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.31179988384246826,
"max": 1.1284958124160767,
"mean": 0.6666731238365173,
"std": 0.09859278053045273,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8728909492492676,
"max": 0.6278397440910339,
"mean": 0.0016749973874539137,
"std": 0.047438185662031174,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.2722160518169403,
"max": 0.0340891033411026,
"mean": -0.046644046902656555,
"std": 0.04069075360894203,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.922055184841156,
"max": 0.9654105305671692,
"mean": 0.0010205680737271905,
"std": 0.04070195555686951,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14518415927886963,
"max": 0.07515987008810043,
"mean": -0.009094657376408577,
"std": 0.025729060173034668,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.2397412657737732,
"max": 0.7171911001205444,
"mean": 0.447447270154953,
"std": 0.05987730622291565,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.2741525173187256,
"max": 0.29877936840057373,
"mean": 8.61497210280504e-06,
"std": 0.03547372668981552,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11957156658172607,
"max": 0.11899449676275253,
"mean": 0.0007509939605370164,
"std": 0.0276488047093153,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2823837697505951,
"max": 0.28084659576416016,
"mean": -7.657262904103845e-05,
"std": 0.035102009773254395,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5205748081207275,
"max": 2.532623291015625,
"mean": 0.02687813714146614,
"std": 0.5879213809967041,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.2220122367143631,
"max": 0.27260157465934753,
"mean": 2.5499884941382334e-06,
"std": 0.030731454491615295,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03331878036260605,
"max": 0.031287048012018204,
"mean": 0.00011721440387191251,
"std": 0.01239620428532362,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.2359972894191742,
"max": 0.23261798918247223,
"mean": 5.7136268878821284e-05,
"std": 0.025697365403175354,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13661594688892365,
"max": 0.12854568660259247,
"mean": -0.005501019302755594,
"std": 0.03999658301472664,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3546392619609833,
"max": 1.180222511291504,
"mean": 0.7107274532318115,
"std": 0.10418680310249329,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6183957457542419,
"max": 0.5562719106674194,
"mean": 0.001160319778136909,
"std": 0.04611416533589363,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.19019058346748352,
"max": 0.024931631982326508,
"mean": -0.034878939390182495,
"std": 0.028703488409519196,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1339737176895142,
"max": 0.9729978442192078,
"mean": 0.00035909697180613875,
"std": 0.04234269633889198,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.6004759073257446,
"max": 0.06302264332771301,
"mean": -0.004885237663984299,
"std": 0.028683220967650414,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.37538695335388184,
"max": 0.9469302892684937,
"mean": 0.5929263234138489,
"std": 0.0680219903588295,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3926527798175812,
"max": 0.37037163972854614,
"mean": 7.004380313446745e-05,
"std": 0.03718654066324234,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11952866613864899,
"max": 0.1371433585882187,
"mean": 0.0009209888521581888,
"std": 0.029237791895866394,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6214983463287354,
"max": 0.5109242796897888,
"mean": 1.5226184586936142e-05,
"std": 0.036439333111047745,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.222587585449219,
"max": 8.827320098876953,
"mean": -0.10952811688184738,
"std": 1.7043956518173218,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.2775035798549652,
"max": 0.24042560160160065,
"mean": 5.222904292168096e-05,
"std": 0.03261308744549751,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05175856128334999,
"max": 0.03964223712682724,
"mean": 9.375870286021382e-05,
"std": 0.012972756288945675,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23131398856639862,
"max": 0.2357378751039505,
"mean": -2.203516305598896e-05,
"std": 0.02938969060778618,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.2051505148410797,
"max": 0.10573741793632507,
"mean": -0.0040251207537949085,
"std": 0.032664697617292404,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3397069573402405,
"max": 1.01918625831604,
"mean": 0.7008247375488281,
"std": 0.0969780907034874,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5670483708381653,
"max": 0.8365305662155151,
"mean": 0.00041504879482090473,
"std": 0.042294830083847046,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.2130415141582489,
"max": 0.029987983405590057,
"mean": -0.03220636397600174,
"std": 0.02657567895948887,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7582250833511353,
"max": 0.7219672799110413,
"mean": -1.576655267854221e-05,
"std": 0.03683546185493469,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.26458415389060974,
"max": 0.10674209892749786,
"mean": -0.003017352893948555,
"std": 0.02890385128557682,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28402721881866455,
"max": 0.6998150944709778,
"mean": 0.49963071942329407,
"std": 0.04700654000043869,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27952155470848083,
"max": 0.23467987775802612,
"mean": -0.00011085892765549943,
"std": 0.038757603615522385,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15429016947746277,
"max": 0.12700684368610382,
"mean": -0.002232399070635438,
"std": 0.033386100083589554,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41612547636032104,
"max": 0.6611561179161072,
"mean": -1.8461763829691336e-05,
"std": 0.03909667953848839,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.2564592361450195,
"max": 4.743135929107666,
"mean": -0.020397484302520752,
"std": 1.0097577571868896,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.2459408938884735,
"max": 0.2083207219839096,
"mean": 4.4360454921843484e-05,
"std": 0.03396270051598549,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03462521731853485,
"max": 0.045053571462631226,
"mean": -2.1719199139624834e-05,
"std": 0.012641450390219688,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20202401280403137,
"max": 0.20743757486343384,
"mean": -2.9260227165650576e-05,
"std": 0.031020890921354294,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.20072369277477264,
"max": 0.11369979381561279,
"mean": -0.002900277031585574,
"std": 0.03456325829029083,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.3669256269931793,
"max": 1.064845323562622,
"mean": 0.6706051230430603,
"std": 0.06665434688329697,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.4000990390777588,
"max": 0.5037862062454224,
"mean": -3.870507498504594e-05,
"std": 0.04113040864467621,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12917247414588928,
"max": 0.026963019743561745,
"mean": -0.030557911843061447,
"std": 0.021937619894742966,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.4511619806289673,
"max": 0.4353387653827667,
"mean": 7.546078268205747e-05,
"std": 0.03489077836275101,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.26869964599609375,
"max": 0.07339140772819519,
"mean": -0.0010946399997919798,
"std": 0.023160062730312347,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.2875079810619354,
"max": 0.6899884343147278,
"mean": 0.5247476696968079,
"std": 0.04796215519309044,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22366264462471008,
"max": 0.2245350182056427,
"mean": 1.589955536474008e-05,
"std": 0.038949232548475266,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13696447014808655,
"max": 0.10982562601566315,
"mean": 0.0002473338390700519,
"std": 0.029272515326738358,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37620943784713745,
"max": 0.4390593469142914,
"mean": -9.372964996146038e-06,
"std": 0.039287250488996506,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8626632690429688,
"max": 5.021180629730225,
"mean": 0.009756950661540031,
"std": 0.8471038937568665,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.2235114425420761,
"max": 0.2212144434452057,
"mean": -3.48434696206823e-07,
"std": 0.03441031649708748,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04396739602088928,
"max": 0.03608814626932144,
"mean": -0.00025925497175194323,
"std": 0.012080671265721321,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.2138509899377823,
"max": 0.18955761194229126,
"mean": -1.6947185940807685e-05,
"std": 0.03153672814369202,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18172238767147064,
"max": 0.12127514183521271,
"mean": -0.0023971181362867355,
"std": 0.04130159318447113,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.42289772629737854,
"max": 0.9483197927474976,
"mean": 0.6628358364105225,
"std": 0.05716627463698387,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.37180185317993164,
"max": 0.47763875126838684,
"mean": -8.19972192402929e-05,
"std": 0.040889617055654526,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.209408238530159,
"max": 0.027359697967767715,
"mean": -0.0302574522793293,
"std": 0.021417709067463875,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3422113060951233,
"max": 0.7372819185256958,
"mean": 8.242652984336019e-05,
"std": 0.034766409546136856,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2412174493074417,
"max": 0.05068235844373703,
"mean": -0.0011914315400645137,
"std": 0.020485328510403633,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.30587607622146606,
"max": 0.6579968333244324,
"mean": 0.5253006219863892,
"std": 0.0464390330016613,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30547264218330383,
"max": 0.21810249984264374,
"mean": 6.997188756940886e-05,
"std": 0.039497073739767075,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14979463815689087,
"max": 0.13157697021961212,
"mean": 0.00032728962833061814,
"std": 0.030529892072081566,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.25832319259643555,
"max": 0.20298458635807037,
"mean": 3.122862472082488e-05,
"std": 0.039488088339567184,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.3464906215667725,
"max": 2.3862874507904053,
"mean": -0.0262940414249897,
"std": 0.45072564482688904,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18955294787883759,
"max": 0.211393803358078,
"mean": 3.7051289837108925e-05,
"std": 0.03479388728737831,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03182046860456467,
"max": 0.03580700233578682,
"mean": -0.0001974685292225331,
"std": 0.012292041443288326,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18930117785930634,
"max": 0.17112135887145996,
"mean": -6.836307875346392e-05,
"std": 0.03217054903507233,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.14002393186092377,
"max": 0.1378386914730072,
"mean": -0.0025169737637043,
"std": 0.05131695047020912,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4669981598854065,
"max": 0.9623145461082458,
"mean": 0.669116199016571,
"std": 0.053326528519392014,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.324962854385376,
"max": 0.3098026514053345,
"mean": -9.876448530121706e-07,
"std": 0.0409456230700016,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12541106343269348,
"max": 0.025640888139605522,
"mean": -0.030711790546774864,
"std": 0.019869431853294373,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.44164079427719116,
"max": 0.4474758803844452,
"mean": 9.588097600499168e-05,
"std": 0.03511932119727135,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.2256106585264206,
"max": 0.052044421434402466,
"mean": -0.0011865352280437946,
"std": 0.018494844436645508,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.33912554383277893,
"max": 0.7450283169746399,
"mean": 0.558834433555603,
"std": 0.041677191853523254,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27382639050483704,
"max": 0.27962929010391235,
"mean": 2.034128556260839e-05,
"std": 0.0410577729344368,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13741885125637054,
"max": 0.14038565754890442,
"mean": 0.0004929338465444744,
"std": 0.02668425627052784,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.49240002036094666,
"max": 0.35733160376548767,
"mean": 8.901266846805811e-05,
"std": 0.04069547727704048,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.3072962760925293,
"max": 1.7529240846633911,
"mean": -0.021147169172763824,
"std": 0.5008938312530518,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.21894769370555878,
"max": 0.19816064834594727,
"mean": -4.0161168726626784e-05,
"std": 0.03423343971371651,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.04133184999227524,
"max": 0.03901350870728493,
"mean": -0.00013613827468361706,
"std": 0.012887353077530861,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17847225069999695,
"max": 0.1837986409664154,
"mean": 4.7998124500736594e-05,
"std": 0.031556759029626846,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.180707648396492,
"max": 0.18469232320785522,
"mean": -0.0022159582003951073,
"std": 0.05485893413424492,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4741988480091095,
"max": 1.0330065488815308,
"mean": 0.6454803347587585,
"std": 0.05105094239115715,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.2723560929298401,
"max": 0.3096334636211395,
"mean": 0.00011242127220612019,
"std": 0.040681805461645126,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10577475279569626,
"max": 0.026752889156341553,
"mean": -0.029537281021475792,
"std": 0.01797310821712017,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.3403210937976837,
"max": 0.33086487650871277,
"mean": 5.282106576487422e-05,
"std": 0.034412968903779984,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18259213864803314,
"max": 0.04268056899309158,
"mean": -0.0010635886574164033,
"std": 0.017230909317731857,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.32514795660972595,
"max": 0.6914159655570984,
"mean": 0.5113943219184875,
"std": 0.03739636018872261,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.2348308116197586,
"max": 0.22631730139255524,
"mean": -3.621048017521389e-05,
"std": 0.0391756109893322,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11563856154680252,
"max": 0.13239268958568573,
"mean": 0.00015192970749922097,
"std": 0.029222996905446053,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.35409149527549744,
"max": 0.2863385081291199,
"mean": 6.707018656015862e-06,
"std": 0.03924466669559479,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.1504130363464355,
"max": 3.5592541694641113,
"mean": -0.011647488921880722,
"std": 0.6845048069953918,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21134592592716217,
"max": 0.21000461280345917,
"mean": 3.47579552908428e-05,
"std": 0.03448459133505821,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.036000702530145645,
"max": 0.04817511513829231,
"mean": 0.0007898924523033202,
"std": 0.012873834930360317,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.2113579511642456,
"max": 0.19389942288398743,
"mean": -1.0706971806939691e-06,
"std": 0.0316954106092453,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.1872350424528122,
"max": 0.1779821664094925,
"mean": -0.002844380447641015,
"std": 0.058656178414821625,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4746103286743164,
"max": 1.0489076375961304,
"mean": 0.6516687870025635,
"std": 0.05057830363512039,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24878337979316711,
"max": 0.3296516239643097,
"mean": 0.00018073963292408735,
"std": 0.04057016968727112,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12595486640930176,
"max": 0.02493392489850521,
"mean": -0.030515050515532494,
"std": 0.01764742285013199,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.4225960969924927,
"max": 0.4839133322238922,
"mean": 1.030291969073005e-06,
"std": 0.035397231578826904,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.1520412415266037,
"max": 0.043631311506032944,
"mean": 4.209935286780819e-05,
"std": 0.014901721850037575,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31559863686561584,
"max": 0.686523973941803,
"mean": 0.553006649017334,
"std": 0.040904585272073746,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20726847648620605,
"max": 0.22089692950248718,
"mean": 3.191033465554938e-05,
"std": 0.03829946741461754,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13833385705947876,
"max": 0.11308565735816956,
"mean": 2.6655456167645752e-05,
"std": 0.025857754051685333,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.4046614170074463,
"max": 0.37271684408187866,
"mean": 2.56894181802636e-05,
"std": 0.0381796769797802,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7873597145080566,
"max": 2.881237506866455,
"mean": 0.0011979229748249054,
"std": 0.5181517601013184,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20434829592704773,
"max": 0.19823738932609558,
"mean": 2.9684193577850237e-05,
"std": 0.03429735451936722,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.050780050456523895,
"max": 0.040064383298158646,
"mean": -0.00042128204950131476,
"std": 0.01341989729553461,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.1970871537923813,
"max": 0.20266157388687134,
"mean": -1.2426969988155179e-05,
"std": 0.031805407255887985,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.1938190907239914,
"max": 0.19595396518707275,
"mean": -0.0029727788642048836,
"std": 0.06256895512342453,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.34895268082618713,
"max": 1.0913121700286865,
"mean": 0.6674203276634216,
"std": 0.056132975965738297,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22646191716194153,
"max": 0.25265538692474365,
"mean": 0.0003584488877095282,
"std": 0.040759678930044174,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09146817028522491,
"max": 0.04364684969186783,
"mean": -0.030097611248493195,
"std": 0.017646051943302155,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.35469669103622437,
"max": 0.30548718571662903,
"mean": -4.469315172173083e-05,
"std": 0.03712276369333267,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.1623995155096054,
"max": 0.06374479830265045,
"mean": -8.042766421567649e-05,
"std": 0.01944616436958313,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34871119260787964,
"max": 0.7271286249160767,
"mean": 0.5425379872322083,
"std": 0.03944627195596695,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.2201070785522461,
"max": 0.2242431491613388,
"mean": -1.1387233826098964e-05,
"std": 0.03923100233078003,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11890711635351181,
"max": 0.1713198721408844,
"mean": 0.0002833662729244679,
"std": 0.025163158774375916,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24783332645893097,
"max": 0.30217495560646057,
"mean": -3.6862991692032665e-05,
"std": 0.038930460810661316,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.520315170288086,
"max": 3.7306737899780273,
"mean": 0.015852145850658417,
"std": 0.7850235104560852,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21981129050254822,
"max": 0.23816066980361938,
"mean": -1.3107633094477933e-05,
"std": 0.036303482949733734,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04740596562623978,
"max": 0.05159047618508339,
"mean": 0.000481397844851017,
"std": 0.013528619892895222,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.2151964157819748,
"max": 0.21832282841205597,
"mean": 5.642603355227038e-05,
"std": 0.03361587971448898,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.2122570425271988,
"max": 0.23222938179969788,
"mean": -0.005098365712910891,
"std": 0.06190234050154686,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36193394660949707,
"max": 1.1087924242019653,
"mean": 0.6995820999145508,
"std": 0.05450976639986038,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23606520891189575,
"max": 0.24584993720054626,
"mean": 0.00046336432569660246,
"std": 0.041269052773714066,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09852692484855652,
"max": 0.06841564178466797,
"mean": -0.0314490832388401,
"std": 0.01816665753722191,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.30322569608688354,
"max": 0.3532632291316986,
"mean": -8.268543751910329e-05,
"std": 0.04027474299073219,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15293245017528534,
"max": 0.1503082662820816,
"mean": 0.0002610071678645909,
"std": 0.023066464811563492,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9987825155258179,
"max": 1.011022686958313,
"mean": 1.0016167163848877,
"std": 0.004121079575270414,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.03126484900712967,
"max": 0.03125990182161331,
"mean": -1.9292880097054876e-05,
"std": 0.0180410947650671,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031222796067595482,
"max": 0.030990226194262505,
"mean": -0.001084181945770979,
"std": 0.017950553447008133,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.03126567602157593,
"max": 0.031269483268260956,
"mean": 3.546300376910949e-06,
"std": 0.018041500821709633,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.03114791214466095,
"max": 0.03117155283689499,
"mean": 0.0003340535331517458,
"std": 0.018062960356473923,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0005971609498374164,
"max": 0.0006745979771949351,
"mean": 4.374485797598027e-06,
"std": 0.0001794710842659697,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9978547096252441,
"max": 1.0122681856155396,
"mean": 1.0009429454803467,
"std": 0.0034361695870757103,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03340178728103638,
"max": 0.033508703112602234,
"mean": -6.2318931668414734e-06,
"std": 0.01804722100496292,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03293577954173088,
"max": 0.03327555954456329,
"mean": -0.00015042479208204895,
"std": 0.017954858019948006,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.00139134272467345,
"max": 0.0014818700728937984,
"mean": 1.7994759673456429e-06,
"std": 0.0002722168283071369,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0005520335980691016,
"max": 0.0007331477245315909,
"mean": 7.149023986130487e-06,
"std": 0.0001629332109587267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3833008110523224,
"max": 0.7242851853370667,
"mean": 0.5809347033500671,
"std": 0.039344511926174164,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.2398604303598404,
"max": 0.19741135835647583,
"mean": 2.61208933807211e-05,
"std": 0.037466324865818024,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.1193285658955574,
"max": 0.16746975481510162,
"mean": 0.0009843853767961264,
"std": 0.027611562982201576,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.24755319952964783,
"max": 0.5020493268966675,
"mean": -5.023340054322034e-05,
"std": 0.037623729556798935,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.959080934524536,
"max": 3.785468339920044,
"mean": -0.003608043771237135,
"std": 0.6828969120979309,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.2280745655298233,
"max": 0.25265711545944214,
"mean": -1.1726486263796687e-05,
"std": 0.037434931844472885,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07189386337995529,
"max": 0.08095899969339371,
"mean": -0.0005116118700243533,
"std": 0.015669817104935646,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22852574288845062,
"max": 0.2589001953601837,
"mean": -2.8789245334337465e-05,
"std": 0.035421740263700485,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20139215886592865,
"max": 0.21579185128211975,
"mean": -0.005532890558242798,
"std": 0.06838470697402954,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.40495166182518005,
"max": 1.1977423429489136,
"mean": 0.7382426857948303,
"std": 0.05618907883763313,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.22189897298812866,
"max": 0.24627524614334106,
"mean": 0.0005210949457250535,
"std": 0.0413360670208931,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10370241105556488,
"max": 0.024191563948988914,
"mean": -0.03269057348370552,
"std": 0.018939778208732605,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.45156151056289673,
"max": 0.42444875836372375,
"mean": -0.00043494877172634006,
"std": 0.046896398067474365,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.25261297821998596,
"max": 0.47218039631843567,
"mean": 0.0032064011320471764,
"std": 0.0446014478802681,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3172667622566223,
"max": 0.33354270458221436,
"mean": -2.519888585084118e-05,
"std": 0.021287826821208,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3245178461074829,
"max": 0.6904165148735046,
"mean": 0.5711733102798462,
"std": 0.04502657428383827,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16521431505680084,
"max": 0.1752052754163742,
"mean": -4.8754882300272584e-05,
"std": 0.033182479441165924,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18773159384727478,
"max": 0.14384877681732178,
"mean": 3.672283492051065e-05,
"std": 0.02975340373814106,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.38243839144706726,
"max": 0.24725475907325745,
"mean": -9.841056453296915e-06,
"std": 0.03276367485523224,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6714818477630615,
"max": 3.3041720390319824,
"mean": -0.014343326911330223,
"std": 0.9862688779830933,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23551921546459198,
"max": 0.24833251535892487,
"mean": -1.8171514966525137e-05,
"std": 0.041698355227708817,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07285058498382568,
"max": 0.1551419198513031,
"mean": 0.0006671739974990487,
"std": 0.02518472634255886,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.26684004068374634,
"max": 0.2486322820186615,
"mean": -1.5217347026919015e-05,
"std": 0.040139369666576385,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.19041800498962402,
"max": 0.19548022747039795,
"mean": -0.001239710720255971,
"std": 0.06670945882797241,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.3291718661785126,
"max": 1.0067707300186157,
"mean": 0.7195272445678711,
"std": 0.053192976862192154,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23261909186840057,
"max": 0.24629585444927216,
"mean": 0.0001829106913646683,
"std": 0.04090041667222977,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11500220745801926,
"max": 0.01902289316058159,
"mean": -0.042502518743276596,
"std": 0.01891784742474556,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.3915143311023712,
"max": 0.4093465507030487,
"mean": -2.1941355953458697e-05,
"std": 0.04853365942835808,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6959867477416992,
"max": 0.41447487473487854,
"mean": 0.0008487096056342125,
"std": 0.06040440872311592,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0013131406158208847,
"max": 1.000697135925293,
"mean": 0.00048820037045516074,
"std": 0.022089475765824318,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9987786412239075,
"max": 1.0108789205551147,
"mean": 1.0015242099761963,
"std": 0.003978394437581301,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03125932812690735,
"max": 0.031260255724191666,
"mean": -2.101710924762301e-05,
"std": 0.018032435327768326,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031216789036989212,
"max": 0.0312344953417778,
"mean": -0.0006770212785340846,
"std": 0.017827019095420837,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03126361221075058,
"max": 0.03126442804932594,
"mean": -8.826009434415027e-06,
"std": 0.018031461164355278,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031229715794324875,
"max": 0.031247057020664215,
"mean": -0.0007297845440916717,
"std": 0.01794196106493473,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.0004946183180436492,
"max": 0.00040109679684974253,
"mean": -3.799516889557708e-06,
"std": 0.00014799994823988527,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9972319006919861,
"max": 1.0116411447525024,
"mean": 1.0005743503570557,
"std": 0.0034592244774103165,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03315997123718262,
"max": 0.032729245722293854,
"mean": -2.570214064689935e-06,
"std": 0.018028665333986282,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.03235220909118652,
"max": 0.03128715977072716,
"mean": -0.00045961630530655384,
"std": 0.018038177862763405,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0016143623506650329,
"max": 0.001427292707376182,
"mean": -1.0927603852906032e-06,
"std": 0.00026996160158887506,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.00045358933857642114,
"max": 0.00036658692988567054,
"mean": -3.5024249882553704e-06,
"std": 0.0001358992449240759,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23466038703918457,
"max": 0.2728899419307709,
"mean": 6.680695605609799e-06,
"std": 0.018810251727700233,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.3215275704860687,
"max": 0.6988651752471924,
"mean": 0.5818086862564087,
"std": 0.04628920555114746,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18249788880348206,
"max": 0.1985490918159485,
"mean": -1.1619875294854864e-05,
"std": 0.0331842340528965,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16120854020118713,
"max": 0.12988702952861786,
"mean": -0.0010746754705905914,
"std": 0.034188635647296906,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.3333602249622345,
"max": 0.31210559606552124,
"mean": -1.0246277270198334e-05,
"std": 0.03223477676510811,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.836638927459717,
"max": 8.800041198730469,
"mean": 0.09370891749858856,
"std": 1.6243042945861816,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23471659421920776,
"max": 0.24255934357643127,
"mean": 4.1660623537609354e-05,
"std": 0.04085636883974075,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07628928869962692,
"max": 0.06604960560798645,
"mean": 0.0004821753827854991,
"std": 0.01943657174706459,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24707570672035217,
"max": 0.2350512593984604,
"mean": -3.330966137582436e-06,
"std": 0.03943110629916191,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16370660066604614,
"max": 0.16159522533416748,
"mean": 0.0016214787028729916,
"std": 0.06530040502548218,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.556998610496521,
"max": 0.9505069851875305,
"mean": 0.7131754159927368,
"std": 0.04095931351184845,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22923102974891663,
"max": 0.25587573647499084,
"mean": -4.568279109662399e-05,
"std": 0.040574610233306885,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13533200323581696,
"max": 0.022116411477327347,
"mean": -0.041375163942575455,
"std": 0.018435189500451088,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.42361417412757874,
"max": 0.39315521717071533,
"mean": -4.420744517119601e-06,
"std": 0.047783900052309036,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6098850965499878,
"max": 0.6541793942451477,
"mean": 0.001589474268257618,
"std": 0.056938592344522476,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.2520405650138855,
"max": 0.3211195170879364,
"mean": -6.1747768995701335e-06,
"std": 0.019613485783338547,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.35947033762931824,
"max": 0.6870434284210205,
"mean": 0.5708057880401611,
"std": 0.04320356622338295,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22096499800682068,
"max": 0.1776382476091385,
"mean": -3.44411309924908e-05,
"std": 0.034298770129680634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16386361420154572,
"max": 0.23379802703857422,
"mean": 0.0003647217818070203,
"std": 0.032876912504434586,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.2648993730545044,
"max": 0.2407570779323578,
"mean": -5.283746577333659e-05,
"std": 0.03389748930931091,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.875531196594238,
"max": 5.112789630889893,
"mean": 0.04403312876820564,
"std": 1.231998324394226,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24717208743095398,
"max": 0.2512055039405823,
"mean": 7.22141849109903e-05,
"std": 0.043986547738313675,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06276638805866241,
"max": 0.054656121879816055,
"mean": 0.0006459522992372513,
"std": 0.017198164016008377,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2877632677555084,
"max": 0.2726806104183197,
"mean": -5.0024795200442895e-05,
"std": 0.042984671890735626,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16170376539230347,
"max": 0.1710934340953827,
"mean": -0.0028864555060863495,
"std": 0.05931045860052109,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.51991868019104,
"max": 0.9398472905158997,
"mean": 0.7137647867202759,
"std": 0.03922666609287262,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23831400275230408,
"max": 0.2492961287498474,
"mean": 0.00046471404493786395,
"std": 0.040453460067510605,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14562036097049713,
"max": 0.04111756384372711,
"mean": -0.039718322455883026,
"std": 0.02059181034564972,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5351076126098633,
"max": 0.5854408740997314,
"mean": 5.962188879493624e-06,
"std": 0.0488593615591526,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5212635397911072,
"max": 0.4954894483089447,
"mean": 0.0023677186109125614,
"std": 0.05354826897382736,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.27395325899124146,
"max": 0.31585943698883057,
"mean": 1.8985367660206975e-06,
"std": 0.020050065591931343,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.3660656809806824,
"max": 0.7167491316795349,
"mean": 0.593307375907898,
"std": 0.04627520218491554,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21157211065292358,
"max": 0.19981449842453003,
"mean": 3.063139592995867e-05,
"std": 0.03486718237400055,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.1879485547542572,
"max": 0.2043510377407074,
"mean": 0.0009530138340778649,
"std": 0.031568389385938644,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.29089149832725525,
"max": 0.341105580329895,
"mean": -4.692538641393185e-05,
"std": 0.03458765521645546,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.893813371658325,
"max": 3.4017703533172607,
"mean": 0.014513500966131687,
"std": 0.8598799705505371,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22526344656944275,
"max": 0.250789076089859,
"mean": -3.7296154005161952e-06,
"std": 0.042229536920785904,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05549817904829979,
"max": 0.046731892973184586,
"mean": -2.1666113752871752e-05,
"std": 0.0158494021743536,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.29372450709342957,
"max": 0.2908160388469696,
"mean": -7.59748127165949e-06,
"std": 0.041944604367017746,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12536406517028809,
"max": 0.2601471245288849,
"mean": -0.0032426435500383377,
"std": 0.05318090319633484,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.45628464221954346,
"max": 0.8507043719291687,
"mean": 0.7057910561561584,
"std": 0.03590774908661842,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5123029351234436,
"max": 0.34838762879371643,
"mean": 0.0003429077914915979,
"std": 0.04019884020090103,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.1866319328546524,
"max": 0.039536003023386,
"mean": -0.03940858319401741,
"std": 0.021406862884759903,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5465707778930664,
"max": 0.5584931969642639,
"mean": -7.126475975383073e-05,
"std": 0.050734348595142365,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5138925909996033,
"max": 0.6670938730239868,
"mean": 0.0024418262764811516,
"std": 0.04960782080888748,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.33276569843292236,
"max": 0.26628994941711426,
"mean": 3.292404471721966e-06,
"std": 0.01938711293041706,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.3219706416130066,
"max": 0.7718862295150757,
"mean": 0.651161789894104,
"std": 0.04554183781147003,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.2507328987121582,
"max": 0.22062398493289948,
"mean": -2.0154016056039836e-06,
"std": 0.03650148585438728,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.3283964991569519,
"max": 0.2880261540412903,
"mean": -0.0006875221151858568,
"std": 0.038663797080516815,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3113596737384796,
"max": 0.37169572710990906,
"mean": 6.504646444227546e-05,
"std": 0.03624209389090538,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.737742900848389,
"max": 5.83281946182251,
"mean": 0.03801126033067703,
"std": 1.4163931608200073,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.2227693796157837,
"max": 0.2069622278213501,
"mean": -7.526973786298186e-05,
"std": 0.042485106736421585,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07798711210489273,
"max": 0.05173616483807564,
"mean": -0.0009264935506507754,
"std": 0.016420088708400726,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.3309612274169922,
"max": 0.3296358287334442,
"mean": -4.774779426952591e-06,
"std": 0.04279141128063202,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.28600984811782837,
"max": 0.11250722408294678,
"mean": -0.0012054404942318797,
"std": 0.04702861234545708,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4860813617706299,
"max": 0.8933811783790588,
"mean": 0.7376744747161865,
"std": 0.038892824202775955,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.36275342106819153,
"max": 0.2756327986717224,
"mean": 5.113358929520473e-05,
"std": 0.04064434394240379,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.2486657202243805,
"max": 0.046376701444387436,
"mean": -0.03928756341338158,
"std": 0.023350302129983902,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6290910840034485,
"max": 0.5994174480438232,
"mean": -6.010006836731918e-05,
"std": 0.0531165786087513,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.712557315826416,
"max": 0.26695698499679565,
"mean": 0.000916715245693922,
"std": 0.051312319934368134,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3435560464859009,
"max": 0.3038403391838074,
"mean": 2.054806600426673e-07,
"std": 0.01913570426404476,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34980928897857666,
"max": 0.7884078621864319,
"mean": 0.6389412879943848,
"std": 0.04949204996228218,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.2064303159713745,
"max": 0.2077268660068512,
"mean": -5.987969052512199e-05,
"std": 0.03769605979323387,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25974684953689575,
"max": 0.26921483874320984,
"mean": -0.000399288343032822,
"std": 0.04469470679759979,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.35545018315315247,
"max": 0.32378923892974854,
"mean": -6.928052243893035e-06,
"std": 0.03720466047525406,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.283975601196289,
"max": 4.222393035888672,
"mean": -0.0264443326741457,
"std": 1.0090056657791138,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23976586759090424,
"max": 0.24442994594573975,
"mean": -2.508235047571361e-05,
"std": 0.04320976510643959,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06259545683860779,
"max": 0.0569254532456398,
"mean": 0.00034189436701126397,
"std": 0.014161717146635056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.4372391402721405,
"max": 0.37368500232696533,
"mean": 1.4562616343027912e-05,
"std": 0.044121067970991135,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09685619175434113,
"max": 0.17668433487415314,
"mean": -0.0006592039717361331,
"std": 0.035167545080184937,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.42172640562057495,
"max": 1.0772342681884766,
"mean": 0.7485133409500122,
"std": 0.04247161000967026,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.26711001992225647,
"max": 0.2980104982852936,
"mean": -7.953734166221693e-05,
"std": 0.04080444946885109,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18652470409870148,
"max": 0.04387153312563896,
"mean": -0.03684595599770546,
"std": 0.025674043223261833,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4576263427734375,
"max": 0.488967627286911,
"mean": 4.3991476559313014e-05,
"std": 0.05420954152941704,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.287752240896225,
"max": 0.5537111759185791,
"mean": -0.0008832515450194478,
"std": 0.0479048416018486,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.29307857155799866,
"max": 0.32305020093917847,
"mean": 6.496340574813075e-06,
"std": 0.01996980607509613,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.29093778133392334,
"max": 0.7654404640197754,
"mean": 0.6508903503417969,
"std": 0.05225415527820587,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.2440621256828308,
"max": 0.26225581765174866,
"mean": -5.966384833300253e-06,
"std": 0.03961286321282387,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.268706738948822,
"max": 0.20074717700481415,
"mean": -0.0008819116046652198,
"std": 0.05185216665267944,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2733410894870758,
"max": 0.2549380958080292,
"mean": 4.216280103719328e-06,
"std": 0.03870992735028267,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -13.020317077636719,
"max": 16.015220642089844,
"mean": 0.033375781029462814,
"std": 1.9953062534332275,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.2079249769449234,
"max": 0.22674520313739777,
"mean": -7.217413804028183e-05,
"std": 0.04055381566286087,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06965012848377228,
"max": 0.06350152939558029,
"mean": 0.00015418700058944523,
"std": 0.014755439944565296,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.4655463695526123,
"max": 0.3209993243217468,
"mean": 1.953401260834653e-05,
"std": 0.04058877378702164,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06434516608715057,
"max": 0.1157260537147522,
"mean": 0.001194344600662589,
"std": 0.02471684291958809,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.37466296553611755,
"max": 0.9391067624092102,
"mean": 0.7509991526603699,
"std": 0.04050418362021446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.28077101707458496,
"max": 0.274548202753067,
"mean": -0.00016862244228832424,
"std": 0.04099500924348831,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19967925548553467,
"max": 0.0508696548640728,
"mean": -0.03204797953367233,
"std": 0.025167953222990036,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6607509851455688,
"max": 0.5379750728607178,
"mean": -4.8667719966033474e-05,
"std": 0.052846137434244156,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.1939390003681183,
"max": 0.584657609462738,
"mean": -0.0005122774746268988,
"std": 0.041145551949739456,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41793951392173767,
"max": 0.37214717268943787,
"mean": 6.048314844520064e-06,
"std": 0.02162175066769123,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21421198546886444,
"max": 0.7522769570350647,
"mean": 0.6496115922927856,
"std": 0.054447393864393234,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.21056805551052094,
"max": 0.1966959536075592,
"mean": 4.008851828984916e-05,
"std": 0.039464544504880905,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.33072784543037415,
"max": 0.26050281524658203,
"mean": -0.003235320094972849,
"std": 0.056362900882959366,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.20648598670959473,
"max": 0.2557448148727417,
"mean": 5.435877392301336e-05,
"std": 0.038566704839468,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.270581245422363,
"max": 6.962486743927002,
"mean": 0.048468317836523056,
"std": 1.3885526657104492,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.21042834222316742,
"max": 0.23116129636764526,
"mean": -5.202562988415593e-06,
"std": 0.04131306707859039,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.044061992317438126,
"max": 0.03610403463244438,
"mean": 4.031957359984517e-06,
"std": 0.012803297489881516,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.39820992946624756,
"max": 0.3451625406742096,
"mean": -5.5655600590398535e-05,
"std": 0.04238949343562126,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.05527956411242485,
"max": 0.06314276903867722,
"mean": 0.00036968549829907715,
"std": 0.01868215762078762,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3502121865749359,
"max": 1.0526388883590698,
"mean": 0.789475679397583,
"std": 0.049056656658649445,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.333749383687973,
"max": 0.386434406042099,
"mean": -0.00016950398276094347,
"std": 0.04148067533969879,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15795546770095825,
"max": 0.05914008617401123,
"mean": -0.031855080276727676,
"std": 0.025188777595758438,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6976608633995056,
"max": 0.4709860682487488,
"mean": -9.084228804567829e-05,
"std": 0.051792342215776443,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24932992458343506,
"max": 0.3299875855445862,
"mean": -0.00024624879006296396,
"std": 0.04149326682090759,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2875395119190216,
"max": 0.3506205677986145,
"mean": -2.1794317035528366e-06,
"std": 0.02423883229494095,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19665004312992096,
"max": 0.7845895886421204,
"mean": 0.6703099608421326,
"std": 0.05872485041618347,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22986678779125214,
"max": 0.23209868371486664,
"mean": -1.9775907276198268e-05,
"std": 0.040440451353788376,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22065043449401855,
"max": 0.2417624443769455,
"mean": 0.0007816089782863855,
"std": 0.05589631199836731,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21658743917942047,
"max": 0.22758929431438446,
"mean": -7.156423816923052e-05,
"std": 0.03937661275267601,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.943953514099121,
"max": 9.107547760009766,
"mean": -0.0012157298624515533,
"std": 1.8536982536315918,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2707418203353882,
"max": 0.2602587938308716,
"mean": 4.357028228696436e-05,
"std": 0.03840764984488487,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05789529159665108,
"max": 0.05795900523662567,
"mean": 0.0003505878266878426,
"std": 0.014736429788172245,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.2662392258644104,
"max": 0.2892150580883026,
"mean": -6.152272544568405e-05,
"std": 0.03907401114702225,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04396943002939224,
"max": 0.037484679371118546,
"mean": -8.678687299834564e-05,
"std": 0.013375459238886833,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.3395363390445709,
"max": 1.100338101387024,
"mean": 0.863823413848877,
"std": 0.06409083306789398,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.423621267080307,
"max": 0.4195392429828644,
"mean": 0.0003127713571302593,
"std": 0.04350290074944496,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21570223569869995,
"max": 0.17136934399604797,
"mean": -0.029504353180527687,
"std": 0.032010503113269806,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.602144181728363,
"max": 0.5620326995849609,
"mean": -0.00015219957276713103,
"std": 0.05344673991203308,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17926719784736633,
"max": 0.37834614515304565,
"mean": 0.0013675567461177707,
"std": 0.037359848618507385,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.39466091990470886,
"max": 0.36930760741233826,
"mean": 3.647102857939899e-05,
"std": 0.028620684519410133,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2902662754058838,
"max": 0.832281231880188,
"mean": 0.7056034207344055,
"std": 0.06793806701898575,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9263004064559937,
"max": 1.0266234874725342,
"mean": -2.5708328394102864e-05,
"std": 0.04762601479887962,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8822629451751709,
"max": 0.8186339139938354,
"mean": -0.00031781112193129957,
"std": 0.09582255780696869,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.27002349495887756,
"max": 0.24192620813846588,
"mean": -2.2872980480315164e-05,
"std": 0.03895563259720802,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.84510040283203,
"max": 22.94961166381836,
"mean": -0.09204111993312836,
"std": 4.085866928100586,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22870811820030212,
"max": 0.24587669968605042,
"mean": -2.573069286881946e-05,
"std": 0.03863922879099846,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06067140772938728,
"max": 0.046225275844335556,
"mean": -0.0001460441417293623,
"std": 0.014704843983054161,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.3391576111316681,
"max": 0.3760104775428772,
"mean": 7.383272532024421e-06,
"std": 0.040815357118844986,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04665788635611534,
"max": 0.19654953479766846,
"mean": 0.0002728282706812024,
"std": 0.013587887398898602,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.37436628341674805,
"max": 1.138013482093811,
"mean": 0.8901113271713257,
"std": 0.06415355205535889,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.44819676876068115,
"max": 0.5436740517616272,
"mean": 2.450778629281558e-05,
"std": 0.04556773602962494,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.2250596135854721,
"max": 0.08822774887084961,
"mean": -0.03204711154103279,
"std": 0.0378473699092865,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7285163402557373,
"max": 0.6922004222869873,
"mean": 3.462535823928192e-05,
"std": 0.051778655499219894,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.1753203570842743,
"max": 0.21950407326221466,
"mean": 4.071232979185879e-05,
"std": 0.0318208709359169,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.34123340249061584,
"max": 0.37526530027389526,
"mean": 4.290333163226023e-05,
"std": 0.0341440849006176,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.31759148836135864,
"max": 1.2954586744308472,
"mean": 0.6016563177108765,
"std": 0.08407581597566605,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2837989628314972,
"max": 0.2609255015850067,
"mean": -3.0735166092199506e-06,
"std": 0.035984087735414505,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23655052483081818,
"max": 0.2062867432832718,
"mean": 0.0002321804640814662,
"std": 0.05606939643621445,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.436277836561203,
"max": 0.3261794447898865,
"mean": 2.4473378289258108e-05,
"std": 0.03413478285074234,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.569121360778809,
"max": 7.344529628753662,
"mean": -0.007453735917806625,
"std": 0.7020133137702942,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.3451450765132904,
"max": 0.36535224318504333,
"mean": 0.0001032469590427354,
"std": 0.047828368842601776,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07407404482364655,
"max": 0.06063373386859894,
"mean": 0.0009325749706476927,
"std": 0.014960682019591331,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.25645625591278076,
"max": 0.28786128759384155,
"mean": 4.184576027910225e-06,
"std": 0.041555535048246384,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05557131767272949,
"max": 0.06310223042964935,
"mean": 0.00014075382205192,
"std": 0.0071859210729599,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.4938402473926544,
"max": 1.2290534973144531,
"mean": 1.0134642124176025,
"std": 0.1175011619925499,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0939291715621948,
"max": 1.0472568273544312,
"mean": -4.937269113725051e-05,
"std": 0.052410781383514404,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22465373575687408,
"max": 0.17359215021133423,
"mean": -0.027279244735836983,
"std": 0.0364469476044178,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8881030678749084,
"max": 0.9261159300804138,
"mean": -0.00014599041605833918,
"std": 0.05328277125954628,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17176949977874756,
"max": 0.3815639615058899,
"mean": 0.003376794047653675,
"std": 0.03997529670596123,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7789531350135803,
"max": 0.725176990032196,
"mean": 1.8912758605438285e-05,
"std": 0.04616439342498779,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3386198878288269,
"max": 1.43718421459198,
"mean": 0.9484164714813232,
"std": 0.2068886160850525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7457443475723267,
"max": 1.7046759128570557,
"mean": 0.00022706578602083027,
"std": 0.15868695080280304,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.2048320770263672,
"max": 1.1044596433639526,
"mean": -0.009567854925990105,
"std": 0.20464132726192474,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4219454526901245,
"max": 0.42726483941078186,
"mean": 6.450812361435965e-05,
"std": 0.04801829159259796,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.830074310302734,
"max": 19.624286651611328,
"mean": -0.24912264943122864,
"std": 4.795468807220459,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32499611377716064,
"max": 0.43987926840782166,
"mean": -1.1840356819448061e-05,
"std": 0.04616156592965126,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.034201864153146744,
"max": 0.03727949783205986,
"mean": 0.0006420350982807577,
"std": 0.012923939153552055,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7049213647842407,
"max": 0.6658478379249573,
"mean": 4.366881330497563e-05,
"std": 0.057883720844984055,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07255180925130844,
"max": 0.06780894845724106,
"mean": -0.00013478109030984342,
"std": 0.012948636896908283,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38018205761909485,
"max": 1.3912252187728882,
"mean": 1.0665678977966309,
"std": 0.21972529590129852,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6171136498451233,
"max": 0.7182933688163757,
"mean": 0.00011123980220872909,
"std": 0.05802140384912491,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.22050145268440247,
"max": 0.2261514961719513,
"mean": 0.006267528980970383,
"std": 0.04982294142246246,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6300009489059448,
"max": 0.8896978497505188,
"mean": 1.1602171070990153e-05,
"std": 0.023528659716248512,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5090406537055969,
"max": 0.47603797912597656,
"mean": -0.003031304571777582,
"std": 0.0695611834526062,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5378094911575317,
"max": 1.184032917022705,
"mean": 0.7829163670539856,
"std": 0.09918713569641113,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.26840853691101074,
"max": 0.21375010907649994,
"mean": -0.00022396638814825565,
"std": 0.05399699881672859,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23899979889392853,
"max": 0.014829290099442005,
"mean": -0.04399246349930763,
"std": 0.034442439675331116,
"sparsity": 0.0,
"shape": [
100
]
}
}
}