fuvip11 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
c0ef4ea verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.4302421808242798,
"max": 0.29811733961105347,
"mean": -0.0025433888658881187,
"std": 0.04256260767579079,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06305147707462311,
"max": 0.10753221064805984,
"mean": 0.0006371351191774011,
"std": 0.03406313806772232,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4127056300640106,
"max": 0.8369134068489075,
"mean": -0.00020153506193310022,
"std": 0.024111680686473846,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11529576778411865,
"max": 0.32162028551101685,
"mean": -0.0009410998900420964,
"std": 0.019562100991606712,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7922351360321045,
"max": 2.8709537982940674,
"mean": -0.0003647250996436924,
"std": 0.6154845356941223,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.2792224586009979,
"max": 0.3816443681716919,
"mean": 0.0004239956906531006,
"std": 0.04274846613407135,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.222523033618927,
"max": 0.20966869592666626,
"mean": -0.004486067220568657,
"std": 0.040918223559856415,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.42831921577453613,
"max": 0.4761074483394623,
"mean": 3.883292265527416e-06,
"std": 0.02451084926724434,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32521355152130127,
"max": 0.15685473382472992,
"mean": -0.04670340567827225,
"std": 0.05158989131450653,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.41050970554351807,
"max": 0.3547350764274597,
"mean": -0.0001308345381403342,
"std": 0.023604650050401688,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.22980044782161713,
"max": 0.26265424489974976,
"mean": -0.02913527563214302,
"std": 0.04935712739825249,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.25461670756340027,
"max": 0.8201668858528137,
"mean": 0.5254921317100525,
"std": 0.08082755655050278,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.29707157611846924,
"max": 0.26584771275520325,
"mean": -0.0004257621185388416,
"std": 0.032102566212415695,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09281580150127411,
"max": 0.12489211559295654,
"mean": 0.0006475000409409404,
"std": 0.025739654898643494,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.290749192237854,
"max": 0.2813739478588104,
"mean": -7.507578993681818e-05,
"std": 0.030931759625673294,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.900395393371582,
"max": 5.815171718597412,
"mean": -0.009333068504929543,
"std": 1.295695185661316,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4251696765422821,
"max": 0.3438807427883148,
"mean": 9.805745503399521e-05,
"std": 0.029953517019748688,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.029049167409539223,
"max": 0.027643660083413124,
"mean": -0.00032356681185774505,
"std": 0.012573834508657455,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4541175961494446,
"max": 0.4482012987136841,
"mean": 2.389368455624208e-05,
"std": 0.023853901773691177,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08873512595891953,
"max": 0.09103881567716599,
"mean": 0.0022877324372529984,
"std": 0.019517814740538597,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.2668094336986542,
"max": 1.0562759637832642,
"mean": 0.5312086343765259,
"std": 0.10443899780511856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5745095610618591,
"max": 0.6083298325538635,
"mean": -0.0004305951879359782,
"std": 0.038600798696279526,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.1827721893787384,
"max": 0.04561286419630051,
"mean": -0.029457518830895424,
"std": 0.042618319392204285,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.167069435119629,
"max": 1.6338956356048584,
"mean": 0.0003232666349504143,
"std": 0.02769671194255352,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.1623232066631317,
"max": 0.20567050576210022,
"mean": -0.021127892658114433,
"std": 0.027942020446062088,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22407177090644836,
"max": 0.843936026096344,
"mean": 0.4876656234264374,
"std": 0.07522594183683395,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.2555537223815918,
"max": 0.3058427572250366,
"mean": -6.734902854077518e-06,
"std": 0.033475104719400406,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09539701044559479,
"max": 0.11051826924085617,
"mean": 6.649381248280406e-05,
"std": 0.026965470984578133,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.2971154749393463,
"max": 0.2961341142654419,
"mean": 5.3386003855848685e-05,
"std": 0.03254621848464012,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.165225028991699,
"max": 5.085448741912842,
"mean": -0.014597215689718723,
"std": 1.1575956344604492,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.3449501693248749,
"max": 0.3433416187763214,
"mean": 7.857720629544929e-05,
"std": 0.030061962082982063,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03606901317834854,
"max": 0.033370036631822586,
"mean": -0.0001412129495292902,
"std": 0.01303885132074356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3154986798763275,
"max": 0.37501832842826843,
"mean": -2.0688352378783748e-05,
"std": 0.024059457704424858,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.1054358258843422,
"max": 0.12218254804611206,
"mean": -0.001968180760741234,
"std": 0.02885930798947811,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.3115288317203522,
"max": 1.1208443641662598,
"mean": 0.6663118004798889,
"std": 0.09773967415094376,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8727405071258545,
"max": 0.6275568604469299,
"mean": 0.001675269566476345,
"std": 0.04743880778551102,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27153271436691284,
"max": 0.034265656024217606,
"mean": -0.04660956189036369,
"std": 0.04060109704732895,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9227067232131958,
"max": 0.9646649360656738,
"mean": 0.0010214094072580338,
"std": 0.04070667922496796,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14452314376831055,
"max": 0.0749678835272789,
"mean": -0.009091369807720184,
"std": 0.025692423805594444,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.2401818335056305,
"max": 0.7130386829376221,
"mean": 0.4472571313381195,
"std": 0.05933048576116562,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.27240708470344543,
"max": 0.2978667914867401,
"mean": 9.335752110928297e-06,
"std": 0.03546963632106781,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11937365680932999,
"max": 0.11856595426797867,
"mean": 0.0007609212771058083,
"std": 0.027630653232336044,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2809975743293762,
"max": 0.2798910439014435,
"mean": -7.717408880125731e-05,
"std": 0.03509914502501488,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5100622177124023,
"max": 2.5220582485198975,
"mean": 0.026752006262540817,
"std": 0.5868890285491943,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.2211453914642334,
"max": 0.2715946435928345,
"mean": 2.9373950383160263e-06,
"std": 0.030732743442058563,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03357521444559097,
"max": 0.031258679926395416,
"mean": 0.00011264161730650812,
"std": 0.012410733848810196,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.235328808426857,
"max": 0.23169946670532227,
"mean": 5.690910984412767e-05,
"std": 0.025696253404021263,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.1358632743358612,
"max": 0.1274021714925766,
"mean": -0.005497328005731106,
"std": 0.03996951878070831,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3545131981372833,
"max": 1.172075629234314,
"mean": 0.7106390595436096,
"std": 0.10376753658056259,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6174606084823608,
"max": 0.5543855428695679,
"mean": 0.0011602300219237804,
"std": 0.04611969366669655,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.18855658173561096,
"max": 0.024964194744825363,
"mean": -0.034842122346162796,
"std": 0.02861381322145462,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1317338943481445,
"max": 0.9715229272842407,
"mean": 0.00035948510048910975,
"std": 0.04234746843576431,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5981062650680542,
"max": 0.06280992925167084,
"mean": -0.004879314452409744,
"std": 0.028617065399885178,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3752063512802124,
"max": 0.940569281578064,
"mean": 0.5925507545471191,
"std": 0.06694991141557693,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.39141029119491577,
"max": 0.3690900504589081,
"mean": 7.122607348719612e-05,
"std": 0.03718871995806694,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11894620954990387,
"max": 0.13650599122047424,
"mean": 0.0009305156418122351,
"std": 0.029250090941786766,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6192548274993896,
"max": 0.5089151263237,
"mean": 1.523251921753399e-05,
"std": 0.03644222766160965,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.188663482666016,
"max": 8.790773391723633,
"mean": -0.10929425060749054,
"std": 1.6991606950759888,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.27665913105010986,
"max": 0.23989883065223694,
"mean": 5.3170409955782816e-05,
"std": 0.03261546045541763,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05207620561122894,
"max": 0.039528362452983856,
"mean": 9.136732842307538e-05,
"std": 0.012959755957126617,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23080551624298096,
"max": 0.23467440903186798,
"mean": -2.1718551579397172e-05,
"std": 0.0293918177485466,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20433980226516724,
"max": 0.10561156272888184,
"mean": -0.0040257819928228855,
"std": 0.03262433037161827,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3398168385028839,
"max": 1.0127116441726685,
"mean": 0.7008739709854126,
"std": 0.09675976634025574,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5649558305740356,
"max": 0.8329834342002869,
"mean": 0.00041514058830216527,
"std": 0.04230239987373352,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21168796718120575,
"max": 0.030586589127779007,
"mean": -0.03219006583094597,
"std": 0.02651149593293667,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7545908689498901,
"max": 0.7186294794082642,
"mean": -9.42062251851894e-06,
"std": 0.036842189729213715,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.26354482769966125,
"max": 0.10587722808122635,
"mean": -0.0030317441560328007,
"std": 0.028866499662399292,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28444212675094604,
"max": 0.695132315158844,
"mean": 0.49955570697784424,
"std": 0.04653683677315712,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27924680709838867,
"max": 0.2342948317527771,
"mean": -0.00011125784658361226,
"std": 0.03876316547393799,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.1545136421918869,
"max": 0.12684346735477448,
"mean": -0.002232692204415798,
"std": 0.03341302275657654,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41413962841033936,
"max": 0.6599588990211487,
"mean": -1.9788125428021885e-05,
"std": 0.03910021111369133,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.238841533660889,
"max": 4.723404884338379,
"mean": -0.02046296000480652,
"std": 1.0078750848770142,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.245038241147995,
"max": 0.20766045153141022,
"mean": 4.384694329928607e-05,
"std": 0.03396622836589813,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.034554872661828995,
"max": 0.04480086266994476,
"mean": -1.7740559997037053e-05,
"std": 0.012627062387764454,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20076899230480194,
"max": 0.20593972504138947,
"mean": -2.9633309168275446e-05,
"std": 0.031023768708109856,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.1999690979719162,
"max": 0.11344368755817413,
"mean": -0.0029194147791713476,
"std": 0.034512441605329514,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.3670799434185028,
"max": 1.056976079940796,
"mean": 0.67062908411026,
"std": 0.06638980656862259,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.39831405878067017,
"max": 0.5025192499160767,
"mean": -3.858314084936865e-05,
"std": 0.04113723710179329,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12804804742336273,
"max": 0.026756688952445984,
"mean": -0.030546799302101135,
"std": 0.021871846169233322,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.4490903913974762,
"max": 0.4329609274864197,
"mean": 8.376075129490346e-05,
"std": 0.034896120429039,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.26764214038848877,
"max": 0.07259879261255264,
"mean": -0.0011110607301816344,
"std": 0.023125821724534035,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28748002648353577,
"max": 0.68532794713974,
"mean": 0.5245869159698486,
"std": 0.047536663711071014,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22228431701660156,
"max": 0.22351308166980743,
"mean": 1.5719435396022163e-05,
"std": 0.03895285725593567,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13649071753025055,
"max": 0.10923465341329575,
"mean": 0.00023689989757258445,
"std": 0.029244115576148033,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37521880865097046,
"max": 0.43729540705680847,
"mean": -9.554900316288695e-06,
"std": 0.03928901627659798,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8464367389678955,
"max": 5.000250816345215,
"mean": 0.009746391326189041,
"std": 0.8453746438026428,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22334088385105133,
"max": 0.22010144591331482,
"mean": -2.237738954136148e-07,
"std": 0.03441348671913147,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04365158826112747,
"max": 0.035844866186380386,
"mean": -0.00025856425054371357,
"std": 0.012080752290785313,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21325859427452087,
"max": 0.1888350248336792,
"mean": -1.6756794138927944e-05,
"std": 0.03154024854302406,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.1807885617017746,
"max": 0.1208307296037674,
"mean": -0.0024116605054587126,
"std": 0.04126964509487152,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4224590063095093,
"max": 0.9420249462127686,
"mean": 0.6628004908561707,
"std": 0.05680832266807556,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.371380478143692,
"max": 0.4757322669029236,
"mean": -8.227255602832884e-05,
"std": 0.040896233171224594,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20817440748214722,
"max": 0.027128340676426888,
"mean": -0.03024515137076378,
"std": 0.021346455439925194,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.34020015597343445,
"max": 0.7336611747741699,
"mean": 8.482092380290851e-05,
"std": 0.03477148711681366,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2402409464120865,
"max": 0.05044962465763092,
"mean": -0.0011967722093686461,
"std": 0.020463695749640465,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3060604929924011,
"max": 0.6536474823951721,
"mean": 0.525157630443573,
"std": 0.04612673819065094,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30424413084983826,
"max": 0.2173623889684677,
"mean": 6.994098657742143e-05,
"std": 0.03949854522943497,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14945130050182343,
"max": 0.13143886625766754,
"mean": 0.00034817858249880373,
"std": 0.030476493760943413,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.2574465572834015,
"max": 0.20223106443881989,
"mean": 3.098994420724921e-05,
"std": 0.03948768228292465,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.336733102798462,
"max": 2.376356840133667,
"mean": -0.02624763362109661,
"std": 0.44985321164131165,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18909630179405212,
"max": 0.21054214239120483,
"mean": 3.723270128830336e-05,
"std": 0.034798216074705124,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03172660619020462,
"max": 0.03550007939338684,
"mean": -0.00020049612794537097,
"std": 0.012289649806916714,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.1884716898202896,
"max": 0.17050357162952423,
"mean": -6.797749665565789e-05,
"std": 0.03217477723956108,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.1394048035144806,
"max": 0.13731525838375092,
"mean": -0.0025170280132442713,
"std": 0.05131148546934128,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.46712788939476013,
"max": 0.9565918445587158,
"mean": 0.6689888834953308,
"std": 0.052790068089962006,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.32436564564704895,
"max": 0.3097445070743561,
"mean": -1.5296809579012915e-06,
"std": 0.04095211252570152,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12481985241174698,
"max": 0.02530287392437458,
"mean": -0.030714336782693863,
"std": 0.019815392792224884,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.44007495045661926,
"max": 0.44524118304252625,
"mean": 9.531660907668993e-05,
"std": 0.03512417897582054,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22461570799350739,
"max": 0.05165664851665497,
"mean": -0.0011837758356705308,
"std": 0.018468836322426796,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.33936041593551636,
"max": 0.7393229007720947,
"mean": 0.5587522983551025,
"std": 0.04140261933207512,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27253732085227966,
"max": 0.2784145176410675,
"mean": 1.9914490621886216e-05,
"std": 0.041062433272600174,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13705013692378998,
"max": 0.13989973068237305,
"mean": 0.0004888542462140322,
"std": 0.02663799747824669,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.49079182744026184,
"max": 0.35604262351989746,
"mean": 8.881442772690207e-05,
"std": 0.04070043936371803,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.2975404262542725,
"max": 1.7454535961151123,
"mean": -0.021080955862998962,
"std": 0.5002180933952332,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.21756696701049805,
"max": 0.19789846241474152,
"mean": -4.058882768731564e-05,
"std": 0.03423743322491646,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.04133187234401703,
"max": 0.03867634758353233,
"mean": -0.00014505762374028563,
"std": 0.012880876660346985,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.1775415539741516,
"max": 0.18375425040721893,
"mean": 4.7608955355826765e-05,
"std": 0.03156036138534546,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.17991603910923004,
"max": 0.18388697504997253,
"mean": -0.0022191007155925035,
"std": 0.05484011396765709,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4743064045906067,
"max": 1.0255905389785767,
"mean": 0.6453731656074524,
"std": 0.050350919365882874,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.2718814015388489,
"max": 0.30937331914901733,
"mean": 0.00011242884647799656,
"std": 0.04068846255540848,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.1058216467499733,
"max": 0.026849187910556793,
"mean": -0.029516499489545822,
"std": 0.01792926900088787,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.33906012773513794,
"max": 0.3292734920978546,
"mean": 5.717227759305388e-05,
"std": 0.034418120980262756,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18169447779655457,
"max": 0.04204929992556572,
"mean": -0.0010728895431384444,
"std": 0.01721538044512272,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.32545599341392517,
"max": 0.686664342880249,
"mean": 0.5112766027450562,
"std": 0.036954235285520554,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23384402692317963,
"max": 0.22573164105415344,
"mean": -3.598508192226291e-05,
"std": 0.0391816683113575,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11534573137760162,
"max": 0.13162653148174286,
"mean": 0.0001513269089628011,
"std": 0.029193254187703133,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3528960049152374,
"max": 0.285469651222229,
"mean": 7.2757711677695625e-06,
"std": 0.03925016149878502,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.133274078369141,
"max": 3.544353723526001,
"mean": -0.011593645438551903,
"std": 0.6827419400215149,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21140114963054657,
"max": 0.20909518003463745,
"mean": 3.4737786336336285e-05,
"std": 0.0344894602894783,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.035711076110601425,
"max": 0.048078615218400955,
"mean": 0.0007944396347738802,
"std": 0.01285555586218834,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21062366664409637,
"max": 0.193213552236557,
"mean": -1.284678091906244e-06,
"std": 0.031699951738119125,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18667221069335938,
"max": 0.17721369862556458,
"mean": -0.002848550211638212,
"std": 0.058637380599975586,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4746397137641907,
"max": 1.041860818862915,
"mean": 0.651482880115509,
"std": 0.049657855182886124,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24850571155548096,
"max": 0.32913738489151,
"mean": 0.00018063749303109944,
"std": 0.04057687148451805,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12447232753038406,
"max": 0.024594351649284363,
"mean": -0.030502719804644585,
"std": 0.01760093867778778,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.4212387502193451,
"max": 0.482032835483551,
"mean": 2.141768618457718e-06,
"std": 0.03540309891104698,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15185940265655518,
"max": 0.04337269812822342,
"mean": 3.945987918996252e-05,
"std": 0.014877513982355595,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31561797857284546,
"max": 0.682021975517273,
"mean": 0.5529669523239136,
"std": 0.04071478173136711,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20643697679042816,
"max": 0.21993368864059448,
"mean": 3.0923340091248974e-05,
"std": 0.03830339014530182,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.1378619521856308,
"max": 0.112775057554245,
"mean": 2.049036993412301e-05,
"std": 0.02582140639424324,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.40277066826820374,
"max": 0.3711613118648529,
"mean": 2.6232244636048563e-05,
"std": 0.038185227662324905,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7714638710021973,
"max": 2.8691656589508057,
"mean": 0.0011571794748306274,
"std": 0.516919732093811,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20294280350208282,
"max": 0.1974332332611084,
"mean": 2.9497665309463628e-05,
"std": 0.03430052474141121,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.050981007516384125,
"max": 0.04004063457250595,
"mean": -0.0004196166410110891,
"std": 0.013425874523818493,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19651710987091064,
"max": 0.2017611861228943,
"mean": -1.2331822290434502e-05,
"std": 0.031808242201805115,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19314663112163544,
"max": 0.19513675570487976,
"mean": -0.0029698254074901342,
"std": 0.06256996840238571,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.3494587540626526,
"max": 1.0840725898742676,
"mean": 0.6672499775886536,
"std": 0.05523226782679558,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22516681253910065,
"max": 0.2514885663986206,
"mean": 0.00035906361881643534,
"std": 0.040765900164842606,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09103509038686752,
"max": 0.04371785372495651,
"mean": -0.030089743435382843,
"std": 0.017607875168323517,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.35351207852363586,
"max": 0.30409130454063416,
"mean": -4.350150265963748e-05,
"std": 0.03712816908955574,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16206228733062744,
"max": 0.06353683769702911,
"mean": -8.305630763061345e-05,
"std": 0.019406888633966446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.348787397146225,
"max": 0.722071647644043,
"mean": 0.5424383878707886,
"std": 0.039067838340997696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21942198276519775,
"max": 0.22312530875205994,
"mean": -1.1118878319393843e-05,
"std": 0.03923613205552101,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11845415830612183,
"max": 0.1708553582429886,
"mean": 0.0002840349334292114,
"std": 0.025122985243797302,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24687451124191284,
"max": 0.301123708486557,
"mean": -3.652745726867579e-05,
"std": 0.038935691118240356,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5055902004241943,
"max": 3.715036153793335,
"mean": 0.01585160195827484,
"std": 0.7825287580490112,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.2186805158853531,
"max": 0.23763009905815125,
"mean": -1.3581981875177007e-05,
"std": 0.036307912319898605,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.047199200838804245,
"max": 0.05141306668519974,
"mean": 0.0004809980746358633,
"std": 0.013516527600586414,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21401917934417725,
"max": 0.21761927008628845,
"mean": 5.652284016832709e-05,
"std": 0.03361988440155983,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21142059564590454,
"max": 0.23152688145637512,
"mean": -0.005106795579195023,
"std": 0.061881836503744125,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36215895414352417,
"max": 1.1013121604919434,
"mean": 0.6993671655654907,
"std": 0.05360371619462967,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.2346053123474121,
"max": 0.24489951133728027,
"mean": 0.000463481672341004,
"std": 0.0412747748196125,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09809241443872452,
"max": 0.06830352544784546,
"mean": -0.031439535319805145,
"std": 0.01812061481177807,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.3016868829727173,
"max": 0.35154613852500916,
"mean": -8.162677113432437e-05,
"std": 0.040280576795339584,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15234576165676117,
"max": 0.14968463778495789,
"mean": 0.00025512842694297433,
"std": 0.023036863654851913,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.99940425157547,
"max": 1.0017729997634888,
"mean": 1.0002546310424805,
"std": 0.0006659556529484689,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.03126639127731323,
"max": 0.03126263990998268,
"mean": -1.9294351659482345e-05,
"std": 0.018044061958789825,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031232889741659164,
"max": 0.03099249303340912,
"mean": -0.001084338640794158,
"std": 0.017953665927052498,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031263306736946106,
"max": 0.031267084181308746,
"mean": 3.548892891558353e-06,
"std": 0.018044468015432358,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.03115880861878395,
"max": 0.031179169192910194,
"mean": 0.0003339822869747877,
"std": 0.018065886572003365,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9994449615478516,
"max": 1.0018095970153809,
"mean": 1.0002632141113281,
"std": 0.0006522060139104724,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03126963973045349,
"max": 0.03127080947160721,
"mean": -8.397149031225126e-06,
"std": 0.01804318279027939,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.0312512069940567,
"max": 0.031249327585101128,
"mean": 0.0001536280324216932,
"std": 0.01799430511891842,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.38297948241233826,
"max": 0.7195636034011841,
"mean": 0.5807591080665588,
"std": 0.03886506333947182,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.2380739152431488,
"max": 0.19658486545085907,
"mean": 2.6584548322716728e-05,
"std": 0.03746968135237694,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11867813766002655,
"max": 0.16608171164989471,
"mean": 0.0009910191874951124,
"std": 0.02755763940513134,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.2461400330066681,
"max": 0.5007420182228088,
"mean": -5.0447401008568704e-05,
"std": 0.03762757405638695,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.9424328804016113,
"max": 3.7695746421813965,
"mean": -0.0035724048502743244,
"std": 0.681464672088623,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22735856473445892,
"max": 0.2514454424381256,
"mean": -1.1598500350373797e-05,
"std": 0.03743908926844597,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07171762734651566,
"max": 0.08069814741611481,
"mean": -0.0005200206069275737,
"std": 0.015662606805562973,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.2281658798456192,
"max": 0.2580048441886902,
"mean": -2.8616894269362092e-05,
"std": 0.03542575612664223,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20041774213314056,
"max": 0.2152491807937622,
"mean": -0.005537157878279686,
"std": 0.06833865493535995,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4051746428012848,
"max": 1.1894384622573853,
"mean": 0.7380443215370178,
"std": 0.05523209273815155,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.2211739420890808,
"max": 0.2460654377937317,
"mean": 0.0005211163079366088,
"std": 0.04134252667427063,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10346150398254395,
"max": 0.024183176457881927,
"mean": -0.03266960382461548,
"std": 0.018883610144257545,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.4493599832057953,
"max": 0.42234691977500916,
"mean": -0.0004324695619288832,
"std": 0.046903885900974274,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.25148940086364746,
"max": 0.47015321254730225,
"mean": 0.0031974762678146362,
"std": 0.044545728713274,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3170487582683563,
"max": 0.33324581384658813,
"mean": -2.528912045818288e-05,
"std": 0.0212908573448658,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3246053457260132,
"max": 0.6854332685470581,
"mean": 0.5710639357566833,
"std": 0.04471997916698456,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16466176509857178,
"max": 0.1740393489599228,
"mean": -4.8587571654934436e-05,
"std": 0.03318466991186142,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18687166273593903,
"max": 0.14292190968990326,
"mean": 3.81053687306121e-05,
"std": 0.029696526005864143,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.38059744238853455,
"max": 0.24608764052391052,
"mean": -9.966568541130982e-06,
"std": 0.032765913754701614,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.65606689453125,
"max": 3.290353775024414,
"mean": -0.014253877103328705,
"std": 0.9852582216262817,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23507949709892273,
"max": 0.2475711703300476,
"mean": -1.77873171196552e-05,
"std": 0.041702862828969955,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07279693335294724,
"max": 0.15454502403736115,
"mean": 0.000664762279484421,
"std": 0.025170980021357536,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2665710747241974,
"max": 0.24850338697433472,
"mean": -1.535093724669423e-05,
"std": 0.04014323651790619,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18962323665618896,
"max": 0.19475142657756805,
"mean": -0.0012306260177865624,
"std": 0.06669402867555618,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.3292522728443146,
"max": 0.999567985534668,
"mean": 0.7192600965499878,
"std": 0.052342262119054794,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23160076141357422,
"max": 0.2457643449306488,
"mean": 0.00018272445595357567,
"std": 0.04090625420212746,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11420896649360657,
"max": 0.018650474026799202,
"mean": -0.042482297867536545,
"std": 0.018855074420571327,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.3899804949760437,
"max": 0.40730100870132446,
"mean": -2.1874793674214743e-05,
"std": 0.04854067414999008,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6932750344276428,
"max": 0.41266557574272156,
"mean": 0.0008518121903762221,
"std": 0.060295384377241135,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": 0.0,
"max": 0.999998927116394,
"mean": 0.00048828075523488224,
"std": 0.02209167368710041,
"sparsity": 0.99951171875,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9994292855262756,
"max": 1.0017839670181274,
"mean": 1.000253677368164,
"std": 0.000652652932330966,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03126111254096031,
"max": 0.0312650129199028,
"mean": -2.1023370209150016e-05,
"std": 0.0180354006588459,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031219881027936935,
"max": 0.031236713752150536,
"mean": -0.0006771213375031948,
"std": 0.017829909920692444,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03126417100429535,
"max": 0.03126959502696991,
"mean": -8.832794264890254e-06,
"std": 0.018034426495432854,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.03123662993311882,
"max": 0.03124932385981083,
"mean": -0.0007298794225789607,
"std": 0.01794484816491604,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9993973970413208,
"max": 1.0017794370651245,
"mean": 1.00028395652771,
"std": 0.0006690355949103832,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03126800060272217,
"max": 0.031265586614608765,
"mean": 3.591585482354276e-06,
"std": 0.018040791153907776,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.031230367720127106,
"max": 0.03125299513339996,
"mean": 0.00019574598991312087,
"std": 0.018076494336128235,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23457114398479462,
"max": 0.2725405693054199,
"mean": 6.967699391680071e-06,
"std": 0.01881221868097782,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.3212726414203644,
"max": 0.6936339139938354,
"mean": 0.5816882848739624,
"std": 0.04593805596232414,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18182046711444855,
"max": 0.1976739764213562,
"mean": -1.1725308468157891e-05,
"std": 0.033187251538038254,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.1606890708208084,
"max": 0.12948599457740784,
"mean": -0.001067878445610404,
"std": 0.034144606441259384,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.332189679145813,
"max": 0.31144458055496216,
"mean": -1.0352114259148948e-05,
"std": 0.03223797678947449,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.803721904754639,
"max": 8.76359748840332,
"mean": 0.09347224235534668,
"std": 1.6197657585144043,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23381681740283966,
"max": 0.2420002520084381,
"mean": 4.138463191338815e-05,
"std": 0.04086202755570412,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07600986212491989,
"max": 0.06578930467367172,
"mean": 0.00047852861462160945,
"std": 0.019416049122810364,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24590720236301422,
"max": 0.23409155011177063,
"mean": -2.9138864192645997e-06,
"std": 0.039436690509319305,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16287560760974884,
"max": 0.16082623600959778,
"mean": 0.0016318459529429674,
"std": 0.06528104841709137,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5568646192550659,
"max": 0.9439972043037415,
"mean": 0.7129673957824707,
"std": 0.0401376374065876,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22865070402622223,
"max": 0.25514620542526245,
"mean": -4.54368710052222e-05,
"std": 0.04058137908577919,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13480910658836365,
"max": 0.022281890735030174,
"mean": -0.04135727509856224,
"std": 0.018383679911494255,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.42169636487960815,
"max": 0.39239397644996643,
"mean": -4.40980693383608e-06,
"std": 0.04779108986258507,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6075002551078796,
"max": 0.6514228582382202,
"mean": 0.0015837398823350668,
"std": 0.05683837831020355,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.25171443819999695,
"max": 0.32070818543434143,
"mean": -6.0755610320484266e-06,
"std": 0.01961563341319561,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.3600234091281891,
"max": 0.6823956370353699,
"mean": 0.5707757472991943,
"std": 0.04296165704727173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22057192027568817,
"max": 0.1770636886358261,
"mean": -3.4672062611207366e-05,
"std": 0.03430239111185074,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16365490853786469,
"max": 0.23306845128536224,
"mean": 0.0003636471228674054,
"std": 0.03286948427557945,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.2637504041194916,
"max": 0.23983356356620789,
"mean": -5.237644290900789e-05,
"std": 0.03390154615044594,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.8552327156066895,
"max": 5.091460227966309,
"mean": 0.04388175159692764,
"std": 1.2293211221694946,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.2467021644115448,
"max": 0.2504825294017792,
"mean": 7.218097016448155e-05,
"std": 0.04399321228265762,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06264208257198334,
"max": 0.054531484842300415,
"mean": 0.00065071159042418,
"std": 0.017192156985402107,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2865971624851227,
"max": 0.2718464434146881,
"mean": -4.9919544835574925e-05,
"std": 0.04299159720540047,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16066378355026245,
"max": 0.17053070664405823,
"mean": -0.0028841430321335793,
"std": 0.059287648648023605,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5196661353111267,
"max": 0.9328836798667908,
"mean": 0.7135858535766602,
"std": 0.038419246673583984,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.2381887435913086,
"max": 0.24951320886611938,
"mean": 0.00046486116480082273,
"std": 0.04046149179339409,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14427022635936737,
"max": 0.041461389511823654,
"mean": -0.03969397395849228,
"std": 0.02054336480796337,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5328505039215088,
"max": 0.5830832719802856,
"mean": 5.9098410929436795e-06,
"std": 0.04886835068464279,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5191918015480042,
"max": 0.49353325366973877,
"mean": 0.0023602654691785574,
"std": 0.05344703048467636,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.2736090123653412,
"max": 0.31520769000053406,
"mean": 1.8358268789597787e-06,
"std": 0.020052799955010414,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36640509963035583,
"max": 0.711678147315979,
"mean": 0.593246340751648,
"std": 0.04593454673886299,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21106205880641937,
"max": 0.1996321678161621,
"mean": 3.077441579080187e-05,
"std": 0.03486856073141098,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18727192282676697,
"max": 0.20402666926383972,
"mean": 0.0009561080951243639,
"std": 0.031529128551483154,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.28969451785087585,
"max": 0.3398367166519165,
"mean": -4.7392662963829935e-05,
"std": 0.03458969667553902,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.877439260482788,
"max": 3.3875346183776855,
"mean": 0.014458965510129929,
"std": 0.8584734797477722,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.2244323492050171,
"max": 0.24988871812820435,
"mean": -3.996262876171386e-06,
"std": 0.04223586246371269,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.055074166506528854,
"max": 0.0468442440032959,
"mean": -1.8697581253945827e-05,
"std": 0.015848318114876747,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2929523289203644,
"max": 0.29100877046585083,
"mean": -7.363702025031671e-06,
"std": 0.04195086285471916,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12487897276878357,
"max": 0.2594272792339325,
"mean": -0.003234811592847109,
"std": 0.05315796285867691,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.45620009303092957,
"max": 0.844541609287262,
"mean": 0.7056601047515869,
"std": 0.035222552716732025,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5114080309867859,
"max": 0.34850868582725525,
"mean": 0.00034260982647538185,
"std": 0.040206458419561386,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18708936870098114,
"max": 0.03951717168092728,
"mean": -0.03939085826277733,
"std": 0.02134866826236248,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.544402539730072,
"max": 0.5565053224563599,
"mean": -7.180786633398384e-05,
"std": 0.05074291676282883,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.511856734752655,
"max": 0.6643833518028259,
"mean": 0.002446281723678112,
"std": 0.04952690377831459,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3324280381202698,
"max": 0.2657060921192169,
"mean": 3.681749149109237e-06,
"std": 0.01939038746058941,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.32228395342826843,
"max": 0.76633620262146,
"mean": 0.6510899662971497,
"std": 0.04530107229948044,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.2495409995317459,
"max": 0.21955986320972443,
"mean": -2.516008862585295e-06,
"std": 0.03650251030921936,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.3271917402744293,
"max": 0.2873159945011139,
"mean": -0.0006787859019823372,
"std": 0.03855893388390541,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3100964426994324,
"max": 0.3699168264865875,
"mean": 6.482247408712283e-05,
"std": 0.036243122071027756,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.71769905090332,
"max": 5.807940483093262,
"mean": 0.03796037286520004,
"std": 1.4132623672485352,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22175297141075134,
"max": 0.20589375495910645,
"mean": -7.500311767216772e-05,
"std": 0.04249146580696106,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07759421318769455,
"max": 0.05135132744908333,
"mean": -0.000925259490031749,
"std": 0.016409944742918015,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.33092743158340454,
"max": 0.3291303813457489,
"mean": -4.938564870826667e-06,
"std": 0.04279821738600731,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.28501445055007935,
"max": 0.11160922050476074,
"mean": -0.0012059551663696766,
"std": 0.047013018280267715,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4864731431007385,
"max": 0.8868119716644287,
"mean": 0.7375612854957581,
"std": 0.03823444992303848,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.36123231053352356,
"max": 0.2742029130458832,
"mean": 5.119089109939523e-05,
"std": 0.04065319895744324,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24762794375419617,
"max": 0.046543918550014496,
"mean": -0.03927048668265343,
"std": 0.023254919797182083,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6263269186019897,
"max": 0.5970423817634583,
"mean": -6.188904080772772e-05,
"std": 0.05312599986791611,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.709787905216217,
"max": 0.2658335268497467,
"mean": 0.0009195120073854923,
"std": 0.051235005259513855,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.34334826469421387,
"max": 0.30343398451805115,
"mean": 2.1822438611707184e-07,
"std": 0.019139666110277176,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34997785091400146,
"max": 0.7828695178031921,
"mean": 0.6389003992080688,
"std": 0.049218229949474335,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.2058519572019577,
"max": 0.20681944489479065,
"mean": -5.9934332966804504e-05,
"std": 0.037698548287153244,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.2586883008480072,
"max": 0.26840776205062866,
"mean": -0.0004055192694067955,
"std": 0.044631343334913254,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3542138934135437,
"max": 0.32258859276771545,
"mean": -7.339326657529455e-06,
"std": 0.037206824868917465,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.261901378631592,
"max": 4.204929351806641,
"mean": -0.02642371505498886,
"std": 1.0068365335464478,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.2388344258069992,
"max": 0.24378669261932373,
"mean": -2.555117680458352e-05,
"std": 0.0432158038020134,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06242268532514572,
"max": 0.0566251203417778,
"mean": 0.00035173987271264195,
"std": 0.01414910051971674,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.43747568130493164,
"max": 0.3737330734729767,
"mean": 1.4612624909204897e-05,
"std": 0.04412786290049553,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.0961233526468277,
"max": 0.1762983798980713,
"mean": -0.000659514800645411,
"std": 0.03514162451028824,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.42177778482437134,
"max": 1.0692633390426636,
"mean": 0.7485724687576294,
"std": 0.04206255078315735,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2659589648246765,
"max": 0.29692542552948,
"mean": -7.890580309322104e-05,
"std": 0.040813855826854706,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18484872579574585,
"max": 0.04314016178250313,
"mean": -0.03681201860308647,
"std": 0.02558443695306778,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.457691490650177,
"max": 0.4868350028991699,
"mean": 4.39733594248537e-05,
"std": 0.0542210191488266,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.2863001823425293,
"max": 0.5517781972885132,
"mean": -0.0008814089233055711,
"std": 0.047833118587732315,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.29263076186180115,
"max": 0.32270461320877075,
"mean": 6.018684871378355e-06,
"std": 0.019972756505012512,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2913359999656677,
"max": 0.7601139545440674,
"mean": 0.6508511304855347,
"std": 0.052110809832811356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.24366426467895508,
"max": 0.26166871190071106,
"mean": -5.6619760471221525e-06,
"std": 0.039614126086235046,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.26755285263061523,
"max": 0.20015348494052887,
"mean": -0.0008774641901254654,
"std": 0.05177554860711098,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2722264528274536,
"max": 0.2537742853164673,
"mean": 5.269570010568714e-06,
"std": 0.038710836321115494,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.966026306152344,
"max": 15.947823524475098,
"mean": 0.0332300066947937,
"std": 1.989342451095581,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20672431588172913,
"max": 0.22581705451011658,
"mean": -7.253723015310243e-05,
"std": 0.04055880755186081,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06943444162607193,
"max": 0.06314389407634735,
"mean": 0.00015862843429204077,
"std": 0.0147479847073555,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46546468138694763,
"max": 0.32013440132141113,
"mean": 1.955418883881066e-05,
"std": 0.04059435427188873,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06408563256263733,
"max": 0.11556272953748703,
"mean": 0.0011989418417215347,
"std": 0.02470807358622551,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.3750011920928955,
"max": 0.9319288730621338,
"mean": 0.7511273622512817,
"std": 0.04018896445631981,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.27909016609191895,
"max": 0.27321043610572815,
"mean": -0.00016836788563523442,
"std": 0.04100494086742401,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19844156503677368,
"max": 0.051351871341466904,
"mean": -0.032028019428253174,
"std": 0.025079041719436646,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6585158705711365,
"max": 0.5356709957122803,
"mean": -5.047450395068154e-05,
"std": 0.05285719037055969,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.1926739513874054,
"max": 0.5822402238845825,
"mean": -0.0005105392774567008,
"std": 0.04108486697077751,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41757693886756897,
"max": 0.37195414304733276,
"mean": 6.520090209960472e-06,
"std": 0.021627968177199364,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21454279124736786,
"max": 0.746727705001831,
"mean": 0.6494921445846558,
"std": 0.05432972311973572,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20945341885089874,
"max": 0.19550970196723938,
"mean": 4.009851181763224e-05,
"std": 0.03945960849523544,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.32960787415504456,
"max": 0.25966984033584595,
"mean": -0.003232899820432067,
"std": 0.056286394596099854,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.20589140057563782,
"max": 0.25466933846473694,
"mean": 5.40036016900558e-05,
"std": 0.03856228291988373,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.243993759155273,
"max": 6.932845115661621,
"mean": 0.048340775072574615,
"std": 1.3851999044418335,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20977123081684113,
"max": 0.23046547174453735,
"mean": -4.7887324399198405e-06,
"std": 0.041317813098430634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.043830934911966324,
"max": 0.0359884537756443,
"mean": -6.7679648054763675e-06,
"std": 0.012799433432519436,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.3975262939929962,
"max": 0.34497249126434326,
"mean": -5.5380802223226056e-05,
"std": 0.04239468649029732,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.055168669670820236,
"max": 0.06281793117523193,
"mean": 0.0003579839540179819,
"std": 0.018675317987799644,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.35081058740615845,
"max": 1.0451138019561768,
"mean": 0.7896714210510254,
"std": 0.04873151332139969,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.333694726228714,
"max": 0.38623932003974915,
"mean": -0.00016907340614125133,
"std": 0.04149046167731285,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15751884877681732,
"max": 0.05906709283590317,
"mean": -0.03182389587163925,
"std": 0.0251007080078125,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6963667273521423,
"max": 0.46923714876174927,
"mean": -8.512083149980754e-05,
"std": 0.05180640146136284,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24786238372325897,
"max": 0.3288760185241699,
"mean": -0.00026252405950799584,
"std": 0.04145393148064613,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.28698989748954773,
"max": 0.350361168384552,
"mean": -2.7725566269509727e-06,
"std": 0.02424115315079689,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19679424166679382,
"max": 0.7790785431861877,
"mean": 0.6702431440353394,
"std": 0.05866772681474686,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22908955812454224,
"max": 0.23140233755111694,
"mean": -2.085999039991293e-05,
"std": 0.04043996334075928,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22004202008247375,
"max": 0.24097159504890442,
"mean": 0.0007790824165567756,
"std": 0.055850621312856674,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.2167646586894989,
"max": 0.226406067609787,
"mean": -7.223833381431177e-05,
"std": 0.039374157786369324,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.906242370605469,
"max": 9.069114685058594,
"mean": -0.0012542838230729103,
"std": 1.8484386205673218,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.26939529180526733,
"max": 0.258998304605484,
"mean": 4.3638072384055704e-05,
"std": 0.0384107306599617,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.0579773373901844,
"max": 0.057985395193099976,
"mean": 0.0003543748171068728,
"std": 0.01471623033285141,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.26387640833854675,
"max": 0.28812822699546814,
"mean": -6.169013795442879e-05,
"std": 0.0390775129199028,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04410848394036293,
"max": 0.03735562041401863,
"mean": -9.80982295004651e-05,
"std": 0.013347214087843895,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.33935481309890747,
"max": 1.0925333499908447,
"mean": 0.8639740347862244,
"std": 0.06387708336114883,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.42313116788864136,
"max": 0.41907814145088196,
"mean": 0.0003136416198685765,
"std": 0.04351295530796051,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21479536592960358,
"max": 0.17072512209415436,
"mean": -0.029444200918078423,
"std": 0.0318748876452446,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5986181497573853,
"max": 0.5598904490470886,
"mean": -0.00014800383360125124,
"std": 0.05346141383051872,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17892269790172577,
"max": 0.37738052010536194,
"mean": 0.0013508039992302656,
"std": 0.03731485456228256,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.39432692527770996,
"max": 0.36881834268569946,
"mean": 3.763254062505439e-05,
"std": 0.028617430478334427,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2906792163848877,
"max": 0.8274716138839722,
"mean": 0.7055441737174988,
"std": 0.06783536076545715,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9265903830528259,
"max": 1.027007818222046,
"mean": -2.7936879632761702e-05,
"std": 0.04764379560947418,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8793500661849976,
"max": 0.8158687949180603,
"mean": -0.0002950741327367723,
"std": 0.09555269032716751,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.27022066712379456,
"max": 0.24093179404735565,
"mean": -2.251441401313059e-05,
"std": 0.0389498770236969,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.743555068969727,
"max": 22.852014541625977,
"mean": -0.09188339114189148,
"std": 4.070625305175781,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22778554260730743,
"max": 0.24572508037090302,
"mean": -2.547786607465241e-05,
"std": 0.03864147141575813,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06017241254448891,
"max": 0.045427631586790085,
"mean": -0.00013617021613754332,
"std": 0.014690100215375423,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.3379840552806854,
"max": 0.3750169575214386,
"mean": 7.478654879378155e-06,
"std": 0.040820397436618805,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04619982838630676,
"max": 0.19537773728370667,
"mean": 0.0002735886082518846,
"std": 0.013551585376262665,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.37374061346054077,
"max": 1.1302894353866577,
"mean": 0.8902378082275391,
"std": 0.0640074834227562,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.4474950134754181,
"max": 0.542551577091217,
"mean": 2.5157038180623204e-05,
"std": 0.0455806665122509,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.2237873524427414,
"max": 0.08737614750862122,
"mean": -0.03201454132795334,
"std": 0.03774423152208328,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7263057827949524,
"max": 0.6888318657875061,
"mean": 3.633538290159777e-05,
"std": 0.0517943874001503,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.1743825227022171,
"max": 0.21823401749134064,
"mean": 3.549834946170449e-05,
"std": 0.031774841248989105,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.3394811451435089,
"max": 0.37303876876831055,
"mean": 4.305133916204795e-05,
"std": 0.034135352820158005,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.31772536039352417,
"max": 1.2872265577316284,
"mean": 0.6015347242355347,
"std": 0.08348645269870758,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2831268906593323,
"max": 0.26034945249557495,
"mean": -3.016911477971007e-06,
"std": 0.03598069027066231,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23578572273254395,
"max": 0.20580488443374634,
"mean": 0.00023967580636963248,
"std": 0.056039854884147644,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.43542858958244324,
"max": 0.32475200295448303,
"mean": 2.4229491828009486e-05,
"std": 0.034124139696359634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.546597957611084,
"max": 7.314022064208984,
"mean": -0.007369913160800934,
"std": 0.6993920803070068,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.34419700503349304,
"max": 0.36281776428222656,
"mean": 0.00010317970009054989,
"std": 0.04783639311790466,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07377609610557556,
"max": 0.06036657840013504,
"mean": 0.0009365753503516316,
"std": 0.014937076717615128,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2563660442829132,
"max": 0.28687092661857605,
"mean": 4.898875886283349e-06,
"std": 0.04156457632780075,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.055319979786872864,
"max": 0.06281081587076187,
"mean": 0.000127265666378662,
"std": 0.007150812540203333,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.4940038025379181,
"max": 1.220664620399475,
"mean": 1.0135600566864014,
"std": 0.11748378723859787,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0940163135528564,
"max": 1.0475441217422485,
"mean": -4.872599311056547e-05,
"std": 0.05241787061095238,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.2236405611038208,
"max": 0.1730623096227646,
"mean": -0.027228206396102905,
"std": 0.0363101065158844,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8842402696609497,
"max": 0.9227275252342224,
"mean": -0.00014601278235204518,
"std": 0.05329864099621773,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.1710553914308548,
"max": 0.3796318471431732,
"mean": 0.0033668535761535168,
"std": 0.03987643122673035,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7777752876281738,
"max": 0.722641110420227,
"mean": 1.80296028702287e-05,
"std": 0.0461542084813118,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3386844992637634,
"max": 1.4281909465789795,
"mean": 0.9485001564025879,
"std": 0.20679982006549835,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.745824933052063,
"max": 1.7045180797576904,
"mean": 0.0002270373224746436,
"std": 0.15870553255081177,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.1994949579238892,
"max": 1.1009647846221924,
"mean": -0.009547820314764977,
"std": 0.20390011370182037,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4209446907043457,
"max": 0.42817720770835876,
"mean": 6.392307841451839e-05,
"std": 0.04802021011710167,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.74793243408203,
"max": 19.543048858642578,
"mean": -0.2483428716659546,
"std": 4.7770676612854,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32391926646232605,
"max": 0.438634991645813,
"mean": -1.1790625649155118e-05,
"std": 0.04616706818342209,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03377115726470947,
"max": 0.03684735298156738,
"mean": 0.0006395116215571761,
"std": 0.012911375612020493,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7035614252090454,
"max": 0.6690102815628052,
"mean": 4.2652536649256945e-05,
"std": 0.0578920915722847,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07234025001525879,
"max": 0.06776763498783112,
"mean": -0.00013464699441101402,
"std": 0.012891847640275955,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38047194480895996,
"max": 1.39299738407135,
"mean": 1.06674325466156,
"std": 0.2197609543800354,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6165490746498108,
"max": 0.7185496091842651,
"mean": 0.00011303066276013851,
"std": 0.05802777782082558,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21881279349327087,
"max": 0.22498759627342224,
"mean": 0.00618295231834054,
"std": 0.04969846084713936,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.629830002784729,
"max": 0.8896750807762146,
"mean": 1.2404842891555745e-05,
"std": 0.023545295000076294,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5068444013595581,
"max": 0.47373077273368835,
"mean": -0.0030198940075933933,
"std": 0.06924331188201904,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5382840037345886,
"max": 1.1801176071166992,
"mean": 0.7828130722045898,
"std": 0.09876110404729843,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.267057865858078,
"max": 0.212993323802948,
"mean": -0.0002232328843092546,
"std": 0.054005783051252365,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23836649954319,
"max": 0.014864332042634487,
"mean": -0.043917927891016006,
"std": 0.03428623452782631,
"sparsity": 0.0,
"shape": [
100
]
}
}
}