rda6 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
74457c3 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.4310249388217926,
"max": 0.29892200231552124,
"mean": -0.0025504794903099537,
"std": 0.0425548329949379,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06312082707881927,
"max": 0.10854886472225189,
"mean": 0.000634247378911823,
"std": 0.03414047509431839,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4126858711242676,
"max": 0.8365619778633118,
"mean": -0.00020620696886908263,
"std": 0.02410798706114292,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.1163593977689743,
"max": 0.32443463802337646,
"mean": -0.0009363778517581522,
"std": 0.019653797149658203,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.8154137134552,
"max": 2.8935482501983643,
"mean": -0.0003568639513105154,
"std": 0.6153793334960938,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.2813769578933716,
"max": 0.38245514035224915,
"mean": 0.00042411635513417423,
"std": 0.04274803400039673,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22421328723430634,
"max": 0.21138469874858856,
"mean": -0.004506870172917843,
"std": 0.04105628281831741,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4279993176460266,
"max": 0.47548574209213257,
"mean": 4.261187768861419e-06,
"std": 0.02450713701546192,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.327997088432312,
"max": 0.15884317457675934,
"mean": -0.04679153859615326,
"std": 0.05176762491464615,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4111199676990509,
"max": 0.35511136054992676,
"mean": -0.00012967045768164098,
"std": 0.02359858900308609,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.23166728019714355,
"max": 0.26478779315948486,
"mean": -0.029217107221484184,
"std": 0.0495423898100853,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2546941041946411,
"max": 0.8268164992332458,
"mean": 0.5258853435516357,
"std": 0.08176200091838837,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.29768767952919006,
"max": 0.26705101132392883,
"mean": -0.00042415110510773957,
"std": 0.03210066258907318,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09323342144489288,
"max": 0.12589719891548157,
"mean": 0.0006516888970509171,
"std": 0.02578314207494259,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.2915492653846741,
"max": 0.2830723226070404,
"mean": -7.510973955504596e-05,
"std": 0.03093201108276844,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.933852195739746,
"max": 5.848132610321045,
"mean": -0.009441309608519077,
"std": 1.2997525930404663,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4259975850582123,
"max": 0.34512922167778015,
"mean": 9.808027243707329e-05,
"std": 0.029951922595500946,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028870832175016403,
"max": 0.027608035132288933,
"mean": -0.0003159761254210025,
"std": 0.012566526420414448,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4554309844970703,
"max": 0.44925424456596375,
"mean": 2.2834456103737466e-05,
"std": 0.023853331804275513,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08927308022975922,
"max": 0.09165928512811661,
"mean": 0.002274596830829978,
"std": 0.019546369090676308,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.26676347851753235,
"max": 1.06475031375885,
"mean": 0.5317091345787048,
"std": 0.1056147962808609,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5758013129234314,
"max": 0.60973060131073,
"mean": -0.00043392262887209654,
"std": 0.03859521821141243,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18311595916748047,
"max": 0.045692577958106995,
"mean": -0.02953081764280796,
"std": 0.04277201369404793,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.169153094291687,
"max": 1.6363517045974731,
"mean": 0.00031960621709004045,
"std": 0.027692886069417,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16331635415554047,
"max": 0.20692557096481323,
"mean": -0.02113202027976513,
"std": 0.0279996357858181,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22424264252185822,
"max": 0.8506074547767639,
"mean": 0.487909197807312,
"std": 0.0759621262550354,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.25719332695007324,
"max": 0.3069766163825989,
"mean": -8.219409210141748e-06,
"std": 0.033469025045633316,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.0958663746714592,
"max": 0.1111140251159668,
"mean": 6.868487980682403e-05,
"std": 0.02699616365134716,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.2987782061100006,
"max": 0.2982846796512604,
"mean": 5.100301495986059e-05,
"std": 0.03253886476159096,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.194380760192871,
"max": 5.11414098739624,
"mean": -0.01477175671607256,
"std": 1.1622190475463867,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.3454170525074005,
"max": 0.3440503478050232,
"mean": 7.885548257036135e-05,
"std": 0.03005816601216793,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.036366600543260574,
"max": 0.033365145325660706,
"mean": -0.00014353547885548323,
"std": 0.013023492880165577,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3166007697582245,
"max": 0.37669771909713745,
"mean": -2.1011579519836232e-05,
"std": 0.024054987356066704,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10603390634059906,
"max": 0.12274863570928574,
"mean": -0.0019654321949929,
"std": 0.028894905000925064,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.311918169260025,
"max": 1.1306103467941284,
"mean": 0.666860818862915,
"std": 0.0989983081817627,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8729648590087891,
"max": 0.6280122995376587,
"mean": 0.0016747020417824388,
"std": 0.047436561435461044,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27260690927505493,
"max": 0.03427213430404663,
"mean": -0.04665624350309372,
"std": 0.04072800651192665,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9236066937446594,
"max": 0.9658545255661011,
"mean": 0.0010218569077551365,
"std": 0.04070160537958145,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14540822803974152,
"max": 0.07539817690849304,
"mean": -0.009104669094085693,
"std": 0.025749636813998222,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.23975443840026855,
"max": 0.7185607552528381,
"mean": 0.44753360748291016,
"std": 0.06007208302617073,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.2746535837650299,
"max": 0.2996414601802826,
"mean": 8.662165782880038e-06,
"std": 0.03547052666544914,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11975187063217163,
"max": 0.11919566243886948,
"mean": 0.0007501145591959357,
"std": 0.02767573855817318,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2831306457519531,
"max": 0.2817768156528473,
"mean": -7.67814417486079e-05,
"std": 0.035099856555461884,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5266785621643066,
"max": 2.5387556552886963,
"mean": 0.026949256658554077,
"std": 0.5885584354400635,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.22260574996471405,
"max": 0.2732996642589569,
"mean": 2.9508364605135284e-06,
"std": 0.030731212347745895,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.0335291288793087,
"max": 0.031390510499477386,
"mean": 0.00011758864275179803,
"std": 0.012400473468005657,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23621369898319244,
"max": 0.23289528489112854,
"mean": 5.6726221373537555e-05,
"std": 0.025696825236082077,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13667543232440948,
"max": 0.12879958748817444,
"mean": -0.005504202097654343,
"std": 0.040019236505031586,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.35455986857414246,
"max": 1.1826062202453613,
"mean": 0.7107979655265808,
"std": 0.10437346249818802,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6191003918647766,
"max": 0.5564218759536743,
"mean": 0.0011606740299612284,
"std": 0.04611353576183319,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.19018200039863586,
"max": 0.02485579438507557,
"mean": -0.03489173576235771,
"std": 0.028727849945425987,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1358468532562256,
"max": 0.9746898412704468,
"mean": 0.00035939598456025124,
"std": 0.04234171286225319,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.6019405722618103,
"max": 0.06334464251995087,
"mean": -0.00488577876240015,
"std": 0.028712771832942963,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3755652904510498,
"max": 0.9507709741592407,
"mean": 0.5931843519210815,
"std": 0.0686625987291336,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3929532766342163,
"max": 0.37091946601867676,
"mean": 7.025484228506684e-05,
"std": 0.03718522936105728,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11978376656770706,
"max": 0.13744011521339417,
"mean": 0.0009335688664577901,
"std": 0.029282478615641594,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6229383945465088,
"max": 0.5121926069259644,
"mean": 1.5349294699262828e-05,
"std": 0.03643808513879776,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.242501258850098,
"max": 8.848700523376465,
"mean": -0.10966195166110992,
"std": 1.7074756622314453,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.2780378460884094,
"max": 0.24072492122650146,
"mean": 5.223074913374148e-05,
"std": 0.03261224925518036,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05211928114295006,
"max": 0.03976155444979668,
"mean": 9.01424209587276e-05,
"std": 0.012970111332833767,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23169712722301483,
"max": 0.23602090775966644,
"mean": -2.2195828933035955e-05,
"std": 0.029388954862952232,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20550638437271118,
"max": 0.10590175539255142,
"mean": -0.004026752896606922,
"std": 0.03266817331314087,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3396901488304138,
"max": 1.022835612297058,
"mean": 0.7008680701255798,
"std": 0.09710492938756943,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5683938264846802,
"max": 0.8381193280220032,
"mean": 0.00041519341175444424,
"std": 0.04229409247636795,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21325451135635376,
"max": 0.03037591464817524,
"mean": -0.03223013877868652,
"std": 0.026610074564814568,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7600710391998291,
"max": 0.7236490845680237,
"mean": -1.6499760022270493e-05,
"std": 0.03683502599596977,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.26496192812919617,
"max": 0.10684733092784882,
"mean": -0.0030161943286657333,
"std": 0.028908496722579002,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28418251872062683,
"max": 0.7011516094207764,
"mean": 0.499736487865448,
"std": 0.047200758010149,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.28040796518325806,
"max": 0.23536527156829834,
"mean": -0.00011076986265834421,
"std": 0.03875643387436867,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15493866801261902,
"max": 0.12730616331100464,
"mean": -0.002237653825432062,
"std": 0.03343982622027397,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.4170800745487213,
"max": 0.6621686220169067,
"mean": -1.8650103811523877e-05,
"std": 0.039095137268304825,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.2626214027404785,
"max": 4.750005722045898,
"mean": -0.020378686487674713,
"std": 1.0105632543563843,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24659502506256104,
"max": 0.2085939198732376,
"mean": 4.402307604323141e-05,
"std": 0.033962100744247437,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03477818891406059,
"max": 0.045115940272808075,
"mean": -1.805905776564032e-05,
"std": 0.012638943269848824,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20247574150562286,
"max": 0.20785965025424957,
"mean": -2.8977701731491834e-05,
"std": 0.031019993126392365,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.2010650485754013,
"max": 0.11400442570447922,
"mean": -0.002901929896324873,
"std": 0.03455876186490059,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.3669453561306,
"max": 1.068376898765564,
"mean": 0.6706770658493042,
"std": 0.06678663939237595,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.4009625017642975,
"max": 0.5047707557678223,
"mean": -3.825509702437557e-05,
"std": 0.04113015532493591,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12967447936534882,
"max": 0.026864072307944298,
"mean": -0.03057170659303665,
"std": 0.021967768669128418,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.4517863094806671,
"max": 0.4363614320755005,
"mean": 7.544152322225273e-05,
"std": 0.03489035367965698,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.2692056894302368,
"max": 0.07339853048324585,
"mean": -0.0010960557265207171,
"std": 0.023164359852671623,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.2873815894126892,
"max": 0.6924071311950684,
"mean": 0.5248355865478516,
"std": 0.048200905323028564,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22408804297447205,
"max": 0.22555872797966003,
"mean": 1.55975158122601e-05,
"std": 0.038948412984609604,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13717913627624512,
"max": 0.10996447503566742,
"mean": 0.00024089610087685287,
"std": 0.02930767834186554,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37717288732528687,
"max": 0.43975257873535156,
"mean": -9.77939271251671e-06,
"std": 0.03928566351532936,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.868288516998291,
"max": 5.028470516204834,
"mean": 0.009761041030287743,
"std": 0.8478302955627441,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22423577308654785,
"max": 0.221679225564003,
"mean": -3.3901324059115723e-07,
"std": 0.034409064799547195,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.0438535250723362,
"max": 0.03604500740766525,
"mean": -0.00025803165044635534,
"std": 0.0120812077075243,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.2146783322095871,
"max": 0.1904102861881256,
"mean": -1.7072843547794037e-05,
"std": 0.03153547644615173,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18190543353557587,
"max": 0.12149464339017868,
"mean": -0.0023945681750774384,
"std": 0.04129800572991371,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4226498305797577,
"max": 0.9518083333969116,
"mean": 0.6629198789596558,
"std": 0.057358019053936005,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.372251033782959,
"max": 0.47781607508659363,
"mean": -8.197914576157928e-05,
"std": 0.040889132767915726,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20997951924800873,
"max": 0.027235740795731544,
"mean": -0.030272582545876503,
"std": 0.021444976329803467,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.34334975481033325,
"max": 0.7389779686927795,
"mean": 8.186099876184016e-05,
"std": 0.034765809774398804,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2415534406900406,
"max": 0.050704218447208405,
"mean": -0.001192720839753747,
"std": 0.02049700915813446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3061361312866211,
"max": 0.6592679023742676,
"mean": 0.5253557562828064,
"std": 0.04659049212932587,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.3061867356300354,
"max": 0.2188880741596222,
"mean": 7.013476715655997e-05,
"std": 0.03949468210339546,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.15020529925823212,
"max": 0.13198836147785187,
"mean": 0.00033842536504380405,
"std": 0.030562784522771835,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.25926315784454346,
"max": 0.20377042889595032,
"mean": 3.10853029077407e-05,
"std": 0.039484549313783646,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.3498988151550293,
"max": 2.389754056930542,
"mean": -0.02631671540439129,
"std": 0.4510843753814697,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.19007518887519836,
"max": 0.2122075855731964,
"mean": 3.708741132868454e-05,
"std": 0.03479320555925369,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03199063614010811,
"max": 0.03580143302679062,
"mean": -0.00019849740783683956,
"std": 0.012292149476706982,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.19011414051055908,
"max": 0.17155633866786957,
"mean": -6.832154031144455e-05,
"std": 0.0321698896586895,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.14033056795597076,
"max": 0.13829410076141357,
"mean": -0.0025126286782324314,
"std": 0.05131656676530838,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4672001600265503,
"max": 0.9642724394798279,
"mean": 0.6692001819610596,
"std": 0.05353807285428047,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.32512417435646057,
"max": 0.3099176585674286,
"mean": -8.536699169781059e-07,
"std": 0.04094506427645683,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12580342590808868,
"max": 0.025558948516845703,
"mean": -0.030726371333003044,
"std": 0.019892578944563866,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.44301649928092957,
"max": 0.448657363653183,
"mean": 9.49525274336338e-05,
"std": 0.03511860594153404,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22610187530517578,
"max": 0.0521467961370945,
"mean": -0.0011865891283378005,
"std": 0.018514476716518402,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.3391834497451782,
"max": 0.7460214495658875,
"mean": 0.5588462352752686,
"std": 0.04179359972476959,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.2743752598762512,
"max": 0.27987486124038696,
"mean": 2.0352064893813804e-05,
"std": 0.04105662927031517,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13770411908626556,
"max": 0.14076648652553558,
"mean": 0.0004916964680887759,
"std": 0.026698192581534386,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.4935597777366638,
"max": 0.3583414554595947,
"mean": 8.887881995178759e-05,
"std": 0.04069438576698303,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.311286687850952,
"max": 1.7559641599655151,
"mean": -0.02118358016014099,
"std": 0.5012499094009399,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.2191997468471527,
"max": 0.19883301854133606,
"mean": -4.048732444061898e-05,
"std": 0.03423238918185234,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.041594695299863815,
"max": 0.039164409041404724,
"mean": -0.00013954236055724323,
"std": 0.012892705388367176,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17905071377754211,
"max": 0.18448761105537415,
"mean": 4.79043010273017e-05,
"std": 0.03155573084950447,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.1810525357723236,
"max": 0.18478283286094666,
"mean": -0.0022157104685902596,
"std": 0.054884668439626694,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.47422513365745544,
"max": 1.034525752067566,
"mean": 0.6455625891685486,
"std": 0.05127067118883133,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.2727859616279602,
"max": 0.31039154529571533,
"mean": 0.00011223299225093797,
"std": 0.04068140313029289,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10606198012828827,
"max": 0.026645641773939133,
"mean": -0.02954702451825142,
"std": 0.01799139380455017,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.34065425395965576,
"max": 0.33199548721313477,
"mean": 5.238396261120215e-05,
"std": 0.034412581473588943,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18290212750434875,
"max": 0.042540330439805984,
"mean": -0.001063595642335713,
"std": 0.017244886606931686,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.32540637254714966,
"max": 0.6927012801170349,
"mean": 0.511530876159668,
"std": 0.037588104605674744,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23500792682170868,
"max": 0.22661413252353668,
"mean": -3.6375215131556615e-05,
"std": 0.039175912737846375,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11630432307720184,
"max": 0.1327952891588211,
"mean": 0.00015614689618814737,
"std": 0.02927626110613346,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.35499081015586853,
"max": 0.28717586398124695,
"mean": 7.152914804464672e-06,
"std": 0.03924452140927315,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.1564154624938965,
"max": 3.564419746398926,
"mean": -0.011666063219308853,
"std": 0.6851950883865356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21194273233413696,
"max": 0.21046526730060577,
"mean": 3.472749813226983e-05,
"std": 0.0344846174120903,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03606359288096428,
"max": 0.0485043041408062,
"mean": 0.0007934037130326033,
"std": 0.01287116389721632,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21187099814414978,
"max": 0.19423909485340118,
"mean": -1.3818132629239699e-06,
"std": 0.03169572353363037,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.1876450628042221,
"max": 0.1781487911939621,
"mean": -0.0028378514107316732,
"std": 0.05868522822856903,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4746300280094147,
"max": 1.0532299280166626,
"mean": 0.6519026756286621,
"std": 0.0511440671980381,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24888233840465546,
"max": 0.329919695854187,
"mean": 0.00018074009858537465,
"std": 0.04056980833411217,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.1257043331861496,
"max": 0.024808209389448166,
"mean": -0.03052573651075363,
"std": 0.01766115613281727,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.4241631031036377,
"max": 0.48552921414375305,
"mean": -1.5207942851702683e-06,
"std": 0.03539673238992691,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15242178738117218,
"max": 0.0436730720102787,
"mean": 4.8590598453301936e-05,
"std": 0.01490879151970148,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.3154313564300537,
"max": 0.68807452917099,
"mean": 0.5530612468719482,
"std": 0.041024595499038696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20784315466880798,
"max": 0.22137802839279175,
"mean": 3.199603088432923e-05,
"std": 0.038299061357975006,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13870200514793396,
"max": 0.11339821666479111,
"mean": 2.9128044843673706e-05,
"std": 0.025894545018672943,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.4055723249912262,
"max": 0.37375950813293457,
"mean": 2.5988052584580146e-05,
"std": 0.038179732859134674,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7928740978240967,
"max": 2.885420560836792,
"mean": 0.0012225983664393425,
"std": 0.5186418294906616,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20435325801372528,
"max": 0.1985306441783905,
"mean": 2.9608720069518313e-05,
"std": 0.03429684415459633,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.051018889993429184,
"max": 0.040129613131284714,
"mean": -0.00042048803879879415,
"std": 0.013424505479633808,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19798687100410461,
"max": 0.20356523990631104,
"mean": -1.2490939298004378e-05,
"std": 0.03180477395653725,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.1941322237253189,
"max": 0.19617649912834167,
"mean": -0.002969961380586028,
"std": 0.06259642541408539,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.3487941026687622,
"max": 1.0952281951904297,
"mean": 0.6676215529441833,
"std": 0.05664284899830818,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22712087631225586,
"max": 0.25315943360328674,
"mean": 0.00035851544816978276,
"std": 0.04075949266552925,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09184330701828003,
"max": 0.04372864216566086,
"mean": -0.030109990388154984,
"std": 0.017667723819613457,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.35518717765808105,
"max": 0.30635109543800354,
"mean": -4.3967633246211335e-05,
"std": 0.037122078239917755,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16265232861042023,
"max": 0.06366349011659622,
"mean": -8.268894453067333e-05,
"std": 0.019441038370132446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.3488224744796753,
"max": 0.7298842668533325,
"mean": 0.5426357388496399,
"std": 0.039679452776908875,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.22033143043518066,
"max": 0.22433431446552277,
"mean": -1.1077730960096233e-05,
"std": 0.03923030197620392,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11923559010028839,
"max": 0.1716114580631256,
"mean": 0.00028718815883621573,
"std": 0.025185901671648026,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.2481980323791504,
"max": 0.3025566339492798,
"mean": -3.676430060295388e-05,
"std": 0.0389297790825367,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5254225730895996,
"max": 3.736085891723633,
"mean": 0.01585158333182335,
"std": 0.7859480977058411,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21972878277301788,
"max": 0.23833929002285004,
"mean": -1.325977427768521e-05,
"std": 0.03630264848470688,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04748326912522316,
"max": 0.051650550216436386,
"mean": 0.0004778398433700204,
"std": 0.01352317538112402,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21533912420272827,
"max": 0.21868844330310822,
"mean": 5.647652506013401e-05,
"std": 0.03361491113901138,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21255744993686676,
"max": 0.23268213868141174,
"mean": -0.005099742207676172,
"std": 0.06193498894572258,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36217188835144043,
"max": 1.112847089767456,
"mean": 0.69975745677948,
"std": 0.05501763895153999,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23635224997997284,
"max": 0.24658624827861786,
"mean": 0.00046343228314071894,
"std": 0.041268426924943924,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09862525016069412,
"max": 0.06863635033369064,
"mean": -0.03145936504006386,
"std": 0.018182674422860146,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.30422019958496094,
"max": 0.3540525734424591,
"mean": -8.221832831623033e-05,
"std": 0.04027421772480011,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.1533002257347107,
"max": 0.150687575340271,
"mean": 0.00025470374384894967,
"std": 0.023078717291355133,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9982896447181702,
"max": 1.017301082611084,
"mean": 1.0001298189163208,
"std": 0.0026745295617729425,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031271953135728836,
"max": 0.03127208724617958,
"mean": -1.929010068124626e-05,
"std": 0.01804104819893837,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.03122810088098049,
"max": 0.030984606593847275,
"mean": -0.0010841733310371637,
"std": 0.0179507527500391,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.03126660734415054,
"max": 0.03127255663275719,
"mean": 3.5378593565837946e-06,
"std": 0.018041487783193588,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031172683462500572,
"max": 0.031167395412921906,
"mean": 0.0003339074901305139,
"std": 0.01806284487247467,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0006182725192047656,
"max": 0.0004164598067291081,
"mean": 1.3710750863538124e-06,
"std": 0.0001378587185172364,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9979904890060425,
"max": 1.0161197185516357,
"mean": 1.0013301372528076,
"std": 0.004817315377295017,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.032745394855737686,
"max": 0.03283839672803879,
"mean": -6.682760158582823e-06,
"std": 0.018042659386992455,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03276297450065613,
"max": 0.0325884111225605,
"mean": -0.00013115988986101002,
"std": 0.017956366762518883,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.0011839725775644183,
"max": 0.0011610303772613406,
"mean": 3.635812220181833e-07,
"std": 0.00021423342695925385,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0005281989579088986,
"max": 0.0004011568380519748,
"mean": 2.2640601855528075e-06,
"std": 0.00012689748837146908,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.383169025182724,
"max": 0.725769579410553,
"mean": 0.5810222625732422,
"std": 0.039563409984111786,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23967966437339783,
"max": 0.19745716452598572,
"mean": 2.6129977413802408e-05,
"std": 0.0374654158949852,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.1195952445268631,
"max": 0.16743028163909912,
"mean": 0.0009849121561273932,
"std": 0.02763625606894493,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.24753768742084503,
"max": 0.502853274345398,
"mean": -4.9970258260145783e-05,
"std": 0.0376228392124176,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.9648470878601074,
"max": 3.7909820079803467,
"mean": -0.0036168191581964493,
"std": 0.6834573745727539,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22818903625011444,
"max": 0.25305306911468506,
"mean": -1.1425543561927043e-05,
"std": 0.037434399127960205,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07215739786624908,
"max": 0.08118511736392975,
"mean": -0.0005145666655153036,
"std": 0.015683691948652267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.2285011112689972,
"max": 0.25927454233169556,
"mean": -2.8810776711907238e-05,
"std": 0.03542128577828407,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20174317061901093,
"max": 0.21631476283073425,
"mean": -0.005539278965443373,
"std": 0.06842140108346939,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4053976237773895,
"max": 1.1997506618499756,
"mean": 0.7383711338043213,
"std": 0.05650194734334946,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.2226068526506424,
"max": 0.24658025801181793,
"mean": 0.0005210487288422883,
"std": 0.04133579134941101,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10394058376550674,
"max": 0.02423257753252983,
"mean": -0.032700441777706146,
"std": 0.018963389098644257,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.452515184879303,
"max": 0.4254130423069,
"mean": -0.0004341741732787341,
"std": 0.04689616709947586,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.25287455320358276,
"max": 0.4728158116340637,
"mean": 0.003204880515113473,
"std": 0.04463134706020355,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.31750747561454773,
"max": 0.333750456571579,
"mean": -2.5235824068658985e-05,
"std": 0.021287381649017334,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3244800865650177,
"max": 0.6913307905197144,
"mean": 0.5712176561355591,
"std": 0.045165594667196274,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16547071933746338,
"max": 0.1755398064851761,
"mean": -4.8899608373176306e-05,
"std": 0.033180754631757736,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18801826238632202,
"max": 0.1438588947057724,
"mean": 4.4942658860236406e-05,
"std": 0.029767248779535294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.38313359022140503,
"max": 0.24818716943264008,
"mean": -9.953633707482368e-06,
"std": 0.03276177868247032,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6768205165863037,
"max": 3.3089771270751953,
"mean": -0.014381470158696175,
"std": 0.9868160486221313,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23584222793579102,
"max": 0.24873286485671997,
"mean": -1.8046124750981107e-05,
"std": 0.0416971780359745,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07315867394208908,
"max": 0.15554027259349823,
"mean": 0.0006676731863990426,
"std": 0.02520027756690979,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2670559585094452,
"max": 0.24887487292289734,
"mean": -1.537521166028455e-05,
"std": 0.04013797268271446,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.1908693015575409,
"max": 0.1960526406764984,
"mean": -0.001238689525052905,
"std": 0.06672189384698868,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.3290148973464966,
"max": 1.0089884996414185,
"mean": 0.719682514667511,
"std": 0.053548477590084076,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23323918879032135,
"max": 0.2469726949930191,
"mean": 0.00018311971507500857,
"std": 0.04089980572462082,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11503507941961288,
"max": 0.019024236127734184,
"mean": -0.04251422733068466,
"std": 0.018931886181235313,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.3927544355392456,
"max": 0.4104294776916504,
"mean": -2.164382931368891e-05,
"std": 0.04853343218564987,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6971645355224609,
"max": 0.414955198764801,
"mean": 0.0008486253209412098,
"std": 0.060451194643974304,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.001029345323331654,
"max": 1.0005033016204834,
"mean": 0.00048820505617186427,
"std": 0.022088995203375816,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.99776691198349,
"max": 1.0153907537460327,
"mean": 0.9997058510780334,
"std": 0.0012300637317821383,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.031274545937776566,
"max": 0.03127707168459892,
"mean": -2.1027797629358247e-05,
"std": 0.018032420426607132,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031217729672789574,
"max": 0.031233638525009155,
"mean": -0.0006770637119188905,
"std": 0.017827108502388,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03128187730908394,
"max": 0.031268589198589325,
"mean": -8.834878826746717e-06,
"std": 0.018031446263194084,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031228115782141685,
"max": 0.03124588541686535,
"mean": -0.0007299837889149785,
"std": 0.017942119389772415,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.0004204909782856703,
"max": 0.00033413738128729165,
"mean": -3.152099679937237e-06,
"std": 0.0001164414279628545,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.997612476348877,
"max": 1.018494963645935,
"mean": 1.0012025833129883,
"std": 0.0055990261025726795,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.032435424625873566,
"max": 0.032380323857069016,
"mean": -1.7302188553003361e-06,
"std": 0.018027864396572113,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.032131362706422806,
"max": 0.031162748113274574,
"mean": -0.00037396998959593475,
"std": 0.01804373785853386,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0012890547513961792,
"max": 0.001122222631238401,
"mean": -8.950937626650557e-07,
"std": 0.00020965519070159644,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.00034396781120449305,
"max": 0.00029873003950342536,
"mean": -3.7820796023879666e-06,
"std": 0.000104848513728939,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.2348298579454422,
"max": 0.27300530672073364,
"mean": 6.816113909735577e-06,
"std": 0.018809327855706215,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.3214486837387085,
"max": 0.7001691460609436,
"mean": 0.5819005370140076,
"std": 0.04646027460694313,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18254612386226654,
"max": 0.19860517978668213,
"mean": -1.1607673513935879e-05,
"std": 0.03318353369832039,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.1615392416715622,
"max": 0.13018541038036346,
"mean": -0.001078265719115734,
"std": 0.03421453759074211,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.33349576592445374,
"max": 0.31233182549476624,
"mean": -1.0118232239619829e-05,
"std": 0.032234255224466324,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.8480448722839355,
"max": 8.8128080368042,
"mean": 0.09380069375038147,
"std": 1.6259617805480957,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23474065959453583,
"max": 0.24273009598255157,
"mean": 4.155310307396576e-05,
"std": 0.04085606709122658,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07642843574285507,
"max": 0.06617211550474167,
"mean": 0.0004827451193705201,
"std": 0.01944047026336193,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24762944877147675,
"max": 0.2358739972114563,
"mean": -3.232937160646543e-06,
"std": 0.03943068906664848,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16411840915679932,
"max": 0.1619885265827179,
"mean": 0.001625007251277566,
"std": 0.06529368460178375,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5569814443588257,
"max": 0.9541290402412415,
"mean": 0.7133999466896057,
"std": 0.04144103080034256,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22980599105358124,
"max": 0.2567155957221985,
"mean": -4.5827197027392685e-05,
"std": 0.04057452455163002,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13575804233551025,
"max": 0.02213761769235134,
"mean": -0.04138356074690819,
"std": 0.01845938339829445,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4245927333831787,
"max": 0.39355969429016113,
"mean": -4.580877430271357e-06,
"std": 0.04778376594185829,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6110193133354187,
"max": 0.6553415656089783,
"mean": 0.001590792671777308,
"std": 0.056976497173309326,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.251875638961792,
"max": 0.3209821879863739,
"mean": -6.120833859313279e-06,
"std": 0.019612718373537064,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.35964423418045044,
"max": 0.6887573599815369,
"mean": 0.5708860754966736,
"std": 0.04330369085073471,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.2213190197944641,
"max": 0.17759515345096588,
"mean": -3.466910129645839e-05,
"std": 0.03429858386516571,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16418921947479248,
"max": 0.23438312113285065,
"mean": 0.0003640234936028719,
"std": 0.03290766850113869,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.2654394805431366,
"max": 0.24140575528144836,
"mean": -5.2719900850206614e-05,
"std": 0.03389739617705345,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.882589817047119,
"max": 5.12019157409668,
"mean": 0.04409287869930267,
"std": 1.233181118965149,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.2474043071269989,
"max": 0.2517080307006836,
"mean": 7.239622209453955e-05,
"std": 0.0439867228269577,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.0629691556096077,
"max": 0.054786957800388336,
"mean": 0.0006426851614378393,
"std": 0.017202140763401985,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.28832921385765076,
"max": 0.2730186879634857,
"mean": -5.011680332245305e-05,
"std": 0.04298482462763786,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16195480525493622,
"max": 0.1713690608739853,
"mean": -0.002885536290705204,
"std": 0.05930813401937485,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5195947885513306,
"max": 0.9433215260505676,
"mean": 0.713985800743103,
"std": 0.0396861806511879,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23872706294059753,
"max": 0.24947769939899445,
"mean": 0.000464944401755929,
"std": 0.04045351594686508,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14595000445842743,
"max": 0.041102174669504166,
"mean": -0.03972803056240082,
"std": 0.020616797730326653,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5366718769073486,
"max": 0.5868415236473083,
"mean": 5.812449671793729e-06,
"std": 0.04885939508676529,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5220040678977966,
"max": 0.4962327182292938,
"mean": 0.0023680159356445074,
"std": 0.05358637124300003,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.2740743160247803,
"max": 0.31590986251831055,
"mean": 1.968129254237283e-06,
"std": 0.02004937082529068,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36616218090057373,
"max": 0.718187689781189,
"mean": 0.5934113264083862,
"std": 0.04643949121236801,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21206998825073242,
"max": 0.20034025609493256,
"mean": 3.0636681913165376e-05,
"std": 0.03486590087413788,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18825751543045044,
"max": 0.20496514439582825,
"mean": 0.000955467636231333,
"std": 0.03160287067294121,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.2913488745689392,
"max": 0.34160566329956055,
"mean": -4.710702705779113e-05,
"std": 0.03458679839968681,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.8994882106781006,
"max": 3.406729221343994,
"mean": 0.014544591307640076,
"std": 0.8605263829231262,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.2257968783378601,
"max": 0.2514858543872833,
"mean": -3.6003511922899634e-06,
"std": 0.042229436337947845,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.055651042610406876,
"max": 0.04694758728146553,
"mean": -1.666278694756329e-05,
"std": 0.015861017629504204,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2935076653957367,
"max": 0.2909187078475952,
"mean": -7.359203209489351e-06,
"std": 0.04194429889321327,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12573029100894928,
"max": 0.2607214152812958,
"mean": -0.003240898484364152,
"std": 0.05319065600633621,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.45657190680503845,
"max": 0.8538610339164734,
"mean": 0.7059471011161804,
"std": 0.03630220517516136,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5123063325881958,
"max": 0.3483346104621887,
"mean": 0.00034276110818609595,
"std": 0.04019864276051521,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18701618909835815,
"max": 0.03957710787653923,
"mean": -0.03942158818244934,
"std": 0.021421542391180992,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5481660962104797,
"max": 0.5603045225143433,
"mean": -7.152351463446394e-05,
"std": 0.050734106451272964,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5146781802177429,
"max": 0.6680049300193787,
"mean": 0.002443398116156459,
"std": 0.04963434487581253,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3329000473022461,
"max": 0.2665855884552002,
"mean": 3.3853375498438254e-06,
"std": 0.01938658207654953,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.32180243730545044,
"max": 0.7734456062316895,
"mean": 0.6512116193771362,
"std": 0.04565456882119179,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.2506096363067627,
"max": 0.2205670177936554,
"mean": -2.243723429273814e-06,
"std": 0.0365004725754261,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32875651121139526,
"max": 0.28859665989875793,
"mean": -0.0006945514469407499,
"std": 0.03869060054421425,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.31226253509521484,
"max": 0.3726266324520111,
"mean": 6.49260327918455e-05,
"std": 0.03624095767736435,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.75054407119751,
"max": 5.848582744598389,
"mean": 0.0380375012755394,
"std": 1.4184556007385254,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22316591441631317,
"max": 0.2069820612668991,
"mean": -7.529938011430204e-05,
"std": 0.042484965175390244,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07815916836261749,
"max": 0.051765959709882736,
"mean": -0.0009295076015405357,
"std": 0.016425304114818573,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.3312581181526184,
"max": 0.3296850621700287,
"mean": -4.723461188405054e-06,
"std": 0.04279135540127754,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2866402864456177,
"max": 0.11266554147005081,
"mean": -0.0012074881233274937,
"std": 0.04703830927610397,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4860897958278656,
"max": 0.8950455784797668,
"mean": 0.7378093004226685,
"std": 0.039171766489744186,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3630240857601166,
"max": 0.2759678065776825,
"mean": 5.1290608098497614e-05,
"std": 0.04064415767788887,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.2490600198507309,
"max": 0.04639717563986778,
"mean": -0.03930266201496124,
"std": 0.023369962349534035,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6307172775268555,
"max": 0.6014147996902466,
"mean": -6.16723409621045e-05,
"std": 0.05311626195907593,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7142688035964966,
"max": 0.267661988735199,
"mean": 0.0009166492964141071,
"std": 0.051358189433813095,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3435579240322113,
"max": 0.3038428723812103,
"mean": 1.3023259270994458e-07,
"std": 0.019134989008307457,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.3500676155090332,
"max": 0.7897790670394897,
"mean": 0.6390184760093689,
"std": 0.04962107539176941,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.2066265493631363,
"max": 0.20817363262176514,
"mean": -5.989617056911811e-05,
"std": 0.037695348262786865,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.2602774202823639,
"max": 0.2698180377483368,
"mean": -0.00039462913991883397,
"std": 0.04474588483572006,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3561350107192993,
"max": 0.32447537779808044,
"mean": -6.916588063177187e-06,
"std": 0.03720375522971153,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.291650295257568,
"max": 4.228523254394531,
"mean": -0.02643691562116146,
"std": 1.0099413394927979,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.2399577796459198,
"max": 0.24472706019878387,
"mean": -2.5193990950356238e-05,
"std": 0.04320961609482765,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06267981976270676,
"max": 0.05705071985721588,
"mean": 0.0003437635023146868,
"std": 0.014168186113238335,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.4376278221607208,
"max": 0.3739663064479828,
"mean": 1.456045083614299e-05,
"std": 0.04412108287215233,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09702851623296738,
"max": 0.17698785662651062,
"mean": -0.0006597189931198955,
"std": 0.03517333045601845,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4217059910297394,
"max": 1.0791560411453247,
"mean": 0.7486134767532349,
"std": 0.04263925552368164,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.26739904284477234,
"max": 0.298541396856308,
"mean": -7.951692532515153e-05,
"std": 0.040804121643304825,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18641552329063416,
"max": 0.043663352727890015,
"mean": -0.036861587315797806,
"std": 0.0257096104323864,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4583725333213806,
"max": 0.4902479946613312,
"mean": 4.34339017374441e-05,
"std": 0.05420944094657898,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.2883600890636444,
"max": 0.5551440119743347,
"mean": -0.0008822724921628833,
"std": 0.04795018211007118,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2930268347263336,
"max": 0.3230960965156555,
"mean": 6.1333103076322e-06,
"std": 0.01996854692697525,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.29084402322769165,
"max": 0.768223226070404,
"mean": 0.650917649269104,
"std": 0.05231805518269539,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.24454748630523682,
"max": 0.2624610364437103,
"mean": -5.949783371761441e-06,
"std": 0.039611514657735825,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2689764201641083,
"max": 0.20118767023086548,
"mean": -0.000883190892636776,
"std": 0.05189211666584015,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.27367931604385376,
"max": 0.25521987676620483,
"mean": 4.683277438743971e-06,
"std": 0.038708530366420746,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -13.039263725280762,
"max": 16.03864097595215,
"mean": 0.03343699499964714,
"std": 1.9974913597106934,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.2084328532218933,
"max": 0.2273532599210739,
"mean": -7.200734398793429e-05,
"std": 0.040553417056798935,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06970705837011337,
"max": 0.06357143819332123,
"mean": 0.00015784359129611403,
"std": 0.014761138707399368,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46569308638572693,
"max": 0.3209618628025055,
"mean": 1.970405901374761e-05,
"std": 0.04058854654431343,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06452719122171402,
"max": 0.11591468751430511,
"mean": 0.0011942506534978747,
"std": 0.024729805067181587,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.37459689378738403,
"max": 0.9426000118255615,
"mean": 0.7511058449745178,
"std": 0.040696173906326294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2817957103252411,
"max": 0.27507483959198,
"mean": -0.00016845125355757773,
"std": 0.040994707494974136,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19982005655765533,
"max": 0.05116043612360954,
"mean": -0.03206067159771919,
"std": 0.025184709578752518,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6629015207290649,
"max": 0.5394555330276489,
"mean": -4.886999522568658e-05,
"std": 0.052846018224954605,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.1941312849521637,
"max": 0.5856620669364929,
"mean": -0.0005102052818983793,
"std": 0.04117872565984726,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41802144050598145,
"max": 0.37218335270881653,
"mean": 6.143730843177764e-06,
"std": 0.021620716899633408,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.214231476187706,
"max": 0.7551652193069458,
"mean": 0.6496015787124634,
"std": 0.05449988320469856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.21102380752563477,
"max": 0.19707706570625305,
"mean": 4.027696923003532e-05,
"std": 0.03946160152554512,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.3312985599040985,
"max": 0.2609282433986664,
"mean": -0.0032433252781629562,
"std": 0.05640969052910805,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.20687410235404968,
"max": 0.25594964623451233,
"mean": 5.426290590548888e-05,
"std": 0.038564227521419525,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.281450271606445,
"max": 6.974554538726807,
"mean": 0.04850253462791443,
"std": 1.3900896310806274,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.2110043168067932,
"max": 0.23172873258590698,
"mean": -5.136051640874939e-06,
"std": 0.04131242260336876,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04407680407166481,
"max": 0.03620957210659981,
"mean": 5.837064236402512e-07,
"std": 0.012804933823645115,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.3980613648891449,
"max": 0.34518715739250183,
"mean": -5.568802953348495e-05,
"std": 0.04238880053162575,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.0554049089550972,
"max": 0.06314343214035034,
"mean": 0.00036526317126117647,
"std": 0.01868700049817562,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.35041460394859314,
"max": 1.054603099822998,
"mean": 0.7895448207855225,
"std": 0.04915067180991173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.33399659395217896,
"max": 0.3868362009525299,
"mean": -0.00016958778724074364,
"std": 0.04147977754473686,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15840038657188416,
"max": 0.059087082743644714,
"mean": -0.03186880797147751,
"std": 0.02521045319736004,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6981510519981384,
"max": 0.47227516770362854,
"mean": -8.876612992025912e-05,
"std": 0.05179238319396973,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.2498706579208374,
"max": 0.33086034655570984,
"mean": -0.0002500821719877422,
"std": 0.04153008759021759,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2874675989151001,
"max": 0.3506753444671631,
"mean": -2.142998255294515e-06,
"std": 0.024235961958765984,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19644968211650848,
"max": 0.7875264883041382,
"mean": 0.6702861189842224,
"std": 0.058757346123456955,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.2307407557964325,
"max": 0.23255716264247894,
"mean": -1.9847611838486046e-05,
"std": 0.04043736308813095,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22115467488765717,
"max": 0.24231739342212677,
"mean": 0.0007812330732122064,
"std": 0.05595459043979645,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21687255799770355,
"max": 0.22770829498767853,
"mean": -7.165952411014587e-05,
"std": 0.03937350586056709,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.959362030029297,
"max": 9.123239517211914,
"mean": -0.0011855876073241234,
"std": 1.8560608625411987,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2711891233921051,
"max": 0.2605840563774109,
"mean": 4.364762571640313e-05,
"std": 0.038405757397413254,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05802099406719208,
"max": 0.05812212452292442,
"mean": 0.0003513882402330637,
"std": 0.014736738055944443,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.26627787947654724,
"max": 0.28912854194641113,
"mean": -6.142335041658953e-05,
"std": 0.03907188028097153,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04412651062011719,
"max": 0.03752894699573517,
"mean": -9.05310153029859e-05,
"std": 0.013374187983572483,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.339313268661499,
"max": 1.1022799015045166,
"mean": 0.8638956546783447,
"std": 0.06418420374393463,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.42381733655929565,
"max": 0.41949865221977234,
"mean": 0.0003125929506495595,
"std": 0.04350028932094574,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.2159820944070816,
"max": 0.1717892736196518,
"mean": -0.02952037751674652,
"std": 0.0320223867893219,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.6032647490501404,
"max": 0.5633653998374939,
"mean": -0.00015064005856402218,
"std": 0.053445085883140564,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17956292629241943,
"max": 0.37900540232658386,
"mean": 0.0013650960754603148,
"std": 0.03737950697541237,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.3949747383594513,
"max": 0.36959531903266907,
"mean": 3.693038524943404e-05,
"std": 0.028617311269044876,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2902548313140869,
"max": 0.835411548614502,
"mean": 0.7055742740631104,
"std": 0.06795050203800201,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9264549016952515,
"max": 1.0266518592834473,
"mean": -2.6062916731461883e-05,
"std": 0.047624703496694565,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8848392963409424,
"max": 0.8210154175758362,
"mean": -0.00031388079514726996,
"std": 0.09599340707063675,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.2704119086265564,
"max": 0.24200940132141113,
"mean": -2.2776041078031994e-05,
"std": 0.03895159065723419,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.909391403198242,
"max": 23.011491775512695,
"mean": -0.09215216338634491,
"std": 4.095620155334473,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.2288811355829239,
"max": 0.24590590596199036,
"mean": -2.564151509432122e-05,
"std": 0.03863710165023804,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.060657572001218796,
"max": 0.04613931104540825,
"mean": -0.00014338521577883512,
"std": 0.014703062362968922,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.33906009793281555,
"max": 0.37649407982826233,
"mean": 7.5478201324585825e-06,
"std": 0.04081288352608681,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04671977460384369,
"max": 0.19674423336982727,
"mean": 0.0002734751324169338,
"std": 0.013588963076472282,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.3744518756866455,
"max": 1.1423423290252686,
"mean": 0.890155553817749,
"std": 0.0642639547586441,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.44847023487091064,
"max": 0.5443573594093323,
"mean": 2.4567927539465018e-05,
"std": 0.04556553065776825,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.2254226952791214,
"max": 0.08823559433221817,
"mean": -0.0320654921233654,
"std": 0.03788232430815697,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7300624251365662,
"max": 0.6936558485031128,
"mean": 3.439782449277118e-05,
"std": 0.05177776888012886,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.1755923330783844,
"max": 0.21977680921554565,
"mean": 4.2144907638430595e-05,
"std": 0.03183648735284805,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.3417545258998871,
"max": 0.3754495084285736,
"mean": 4.2937641410389915e-05,
"std": 0.03413964807987213,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3177294135093689,
"max": 1.2977259159088135,
"mean": 0.6017159223556519,
"std": 0.08427947759628296,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2838163673877716,
"max": 0.2612304091453552,
"mean": -2.8361523618514184e-06,
"std": 0.03598065674304962,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23691536486148834,
"max": 0.20665380358695984,
"mean": 0.0002377421478740871,
"std": 0.05610164627432823,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.4367288649082184,
"max": 0.326652467250824,
"mean": 2.422912439214997e-05,
"std": 0.034131284803152084,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.582788944244385,
"max": 7.362354278564453,
"mean": -0.007508529350161552,
"std": 0.7035665512084961,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.34583720564842224,
"max": 0.3661332130432129,
"mean": 0.00010320795263396576,
"std": 0.04782785847783089,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07427486777305603,
"max": 0.060801248997449875,
"mean": 0.0009337762021459639,
"std": 0.014963135123252869,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.25689101219177246,
"max": 0.28821247816085815,
"mean": 4.153083864366636e-06,
"std": 0.04155467450618744,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05564720183610916,
"max": 0.0631924495100975,
"mean": 0.0001379186287522316,
"std": 0.007182796951383352,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49357107281684875,
"max": 1.2338876724243164,
"mean": 1.0134950876235962,
"std": 0.11754289269447327,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0940601825714111,
"max": 1.0474328994750977,
"mean": -4.88213227072265e-05,
"std": 0.05240841209888458,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.2248232364654541,
"max": 0.17388059198856354,
"mean": -0.02729785442352295,
"std": 0.036497559398412704,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8899852633476257,
"max": 0.9281743168830872,
"mean": -0.00014587071200367063,
"std": 0.05328153818845749,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17224453389644623,
"max": 0.38245582580566406,
"mean": 0.0033820997923612595,
"std": 0.04001828283071518,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7799473404884338,
"max": 0.7260819673538208,
"mean": 1.8725522750173695e-05,
"std": 0.046160738915205,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.33860552310943604,
"max": 1.442690134048462,
"mean": 0.9484557509422302,
"std": 0.20696218311786652,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7459073066711426,
"max": 1.704575538635254,
"mean": 0.00022730980708729476,
"std": 0.15868498384952545,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.2076622247695923,
"max": 1.1073572635650635,
"mean": -0.00959145836532116,
"std": 0.20509476959705353,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4218980371952057,
"max": 0.4278029203414917,
"mean": 6.46372718620114e-05,
"std": 0.048015668988227844,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.878219604492188,
"max": 19.671934127807617,
"mean": -0.24954606592655182,
"std": 4.8062262535095215,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.3252118229866028,
"max": 0.44012102484703064,
"mean": -1.1724467185558751e-05,
"std": 0.04616120085120201,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03427257761359215,
"max": 0.03733307123184204,
"mean": 0.0006422841688618064,
"std": 0.012923721224069595,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7051200270652771,
"max": 0.6666434407234192,
"mean": 4.353695476311259e-05,
"std": 0.0578814335167408,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07273512333631516,
"max": 0.06799687445163727,
"mean": -0.0001354652486043051,
"std": 0.012961134314537048,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.3802323043346405,
"max": 1.392055869102478,
"mean": 1.0665756464004517,
"std": 0.2197023183107376,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6175218224525452,
"max": 0.7191157341003418,
"mean": 0.00011173778329975903,
"std": 0.058020252734422684,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.22093473374843597,
"max": 0.22644445300102234,
"mean": 0.006260717287659645,
"std": 0.04986373335123062,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6302544474601746,
"max": 0.8900287747383118,
"mean": 1.1643458492471837e-05,
"std": 0.023527663201093674,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5102453231811523,
"max": 0.4771297872066498,
"mean": -0.0030403323471546173,
"std": 0.06969437003135681,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5377517342567444,
"max": 1.1850762367248535,
"mean": 0.7829766273498535,
"std": 0.09934176504611969,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.26876378059387207,
"max": 0.21405881643295288,
"mean": -0.00022433605045080185,
"std": 0.053995925933122635,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23968708515167236,
"max": 0.014838683418929577,
"mean": -0.0440097339451313,
"std": 0.03449948504567146,
"sparsity": 0.0,
"shape": [
100
]
}
}
}