diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,4683 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "recommendations": { + "focus_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ] + }, + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.43058744072914124, + "max": 0.29903075098991394, + "mean": -0.0025567002594470978, + "std": 0.04255249723792076, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ] + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.06321248412132263, + "max": 0.107655830681324, + "mean": 0.0005928671453148127, + "std": 0.03411800414323807, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.4126332402229309, + "max": 0.8362816572189331, + "mean": -0.00021067322813905776, + "std": 0.024107061326503754, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.11544923484325409, + "max": 0.3215144872665405, + "mean": -0.0009406265453435481, + "std": 0.01957659050822258, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.791715383529663, + "max": 2.870434045791626, + "mean": -0.0003647833364084363, + "std": 0.6153609752655029, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ] + }, + "transformer.input_embed.proj.weight": { + "min": -0.27896371483802795, + "max": 0.3819044828414917, + "mean": 0.0004220041155349463, + "std": 0.04275014251470566, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ] + }, + "transformer.input_embed.proj.bias": { + "min": -0.22224494814872742, + "max": 0.20959755778312683, + "mean": -0.004497884772717953, + "std": 0.040913522243499756, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.4279385209083557, + "max": 0.4752762019634247, + "mean": 2.009033551075845e-06, + "std": 0.024508582428097725, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.32550832629203796, + "max": 0.1569339483976364, + "mean": -0.046702392399311066, + "std": 0.0515773706138134, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.4104919135570526, + "max": 0.3544883131980896, + "mean": -0.00012644486560020596, + "std": 0.02360026352107525, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.229718416929245, + "max": 0.26262396574020386, + "mean": -0.02914787270128727, + "std": 0.04934746399521828, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.1.g": { + "min": 0.2545970380306244, + "max": 0.8200467824935913, + "mean": 0.5254305601119995, + "std": 0.08080543577671051, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.29690292477607727, + "max": 0.26533740758895874, + "mean": -0.00042425302672199905, + "std": 0.0321030355989933, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.09272623807191849, + "max": 0.12487658858299255, + "mean": 0.0006494724657386541, + "std": 0.025737110525369644, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.29031693935394287, + "max": 0.2813326120376587, + "mean": -7.68666504882276e-05, + "std": 0.03093528188765049, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.899355888366699, + "max": 5.814132213592529, + "mean": -0.00933213159441948, + "std": 1.29543137550354, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.42477670311927795, + "max": 0.3437301814556122, + "mean": 9.746497380547225e-05, + "std": 0.029952634125947952, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.028919341042637825, + "max": 0.027677638456225395, + "mean": -0.00031004834454506636, + "std": 0.012572667561471462, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.4539007246494293, + "max": 0.4487650692462921, + "mean": 2.293557918164879e-05, + "std": 0.023855043575167656, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.08868313580751419, + "max": 0.09119853377342224, + "mean": 0.0022740147542208433, + "std": 0.019512386992573738, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.3.g": { + "min": 0.2666647434234619, + "max": 1.0563400983810425, + "mean": 0.5311195850372314, + "std": 0.10441721975803375, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5746223330497742, + "max": 0.6085677742958069, + "mean": -0.0004311846860218793, + "std": 0.038594383746385574, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.18177427351474762, + "max": 0.04579279571771622, + "mean": -0.029445737600326538, + "std": 0.04258440434932709, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.1666346788406372, + "max": 1.6346005201339722, + "mean": 0.0003186643880326301, + "std": 0.027693353593349457, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.16253599524497986, + "max": 0.20575034618377686, + "mean": -0.02111678197979927, + "std": 0.027937985956668854, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.1.g": { + "min": 0.22444167733192444, + "max": 0.8436422944068909, + "mean": 0.4875181317329407, + "std": 0.07519698888063431, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.25531357526779175, + "max": 0.3059065341949463, + "mean": -9.770956239663064e-06, + "std": 0.03346950560808182, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.0954207256436348, + "max": 0.11047575622797012, + "mean": 5.4158546845428646e-05, + "std": 0.026984980329871178, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.2974885404109955, + "max": 0.29604607820510864, + "mean": 5.041498661739752e-05, + "std": 0.03253797069191933, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.164185523986816, + "max": 5.084409236907959, + "mean": -0.014593909494578838, + "std": 1.1573563814163208, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.34487831592559814, + "max": 0.34348052740097046, + "mean": 7.885653030825779e-05, + "std": 0.030057402327656746, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.03615832328796387, + "max": 0.03314381092786789, + "mean": -0.00014287084923125803, + "std": 0.01301794033497572, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.31527891755104065, + "max": 0.3751768469810486, + "mean": -2.1734818801633082e-05, + "std": 0.02405463345348835, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.10528924316167831, + "max": 0.12185486406087875, + "mean": -0.0019566768314689398, + "std": 0.028841182589530945, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.3.g": { + "min": 0.3117589056491852, + "max": 1.1208702325820923, + "mean": 0.6662365198135376, + "std": 0.09775208681821823, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.872468888759613, + "max": 0.6275586485862732, + "mean": 0.0016758753918111324, + "std": 0.047438040375709534, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.2710355520248413, + "max": 0.03406016156077385, + "mean": -0.04659765958786011, + "std": 0.04059656709432602, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.9201626181602478, + "max": 0.9643434882164001, + "mean": 0.0010215931106358767, + "std": 0.04070163145661354, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.14462199807167053, + "max": 0.07486966252326965, + "mean": -0.009085646830499172, + "std": 0.02570141665637493, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.1.g": { + "min": 0.23963269591331482, + "max": 0.7123461365699768, + "mean": 0.4472006559371948, + "std": 0.05932367965579033, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.2729354500770569, + "max": 0.29745981097221375, + "mean": 8.72666532814037e-06, + "std": 0.03547453135251999, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.11902111023664474, + "max": 0.1184910237789154, + "mean": 0.0007516429759562016, + "std": 0.02761562168598175, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.28102290630340576, + "max": 0.27947571873664856, + "mean": -7.658830872969702e-05, + "std": 0.03510264679789543, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.509542465209961, + "max": 2.521538496017456, + "mean": 0.026744995266199112, + "std": 0.5867680311203003, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.2209818959236145, + "max": 0.2715614438056946, + "mean": 2.5364215616718866e-06, + "std": 0.0307310800999403, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.03315867856144905, + "max": 0.0312359519302845, + "mean": 0.00011449654994066805, + "std": 0.012396099045872688, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.23535357415676117, + "max": 0.23171932995319366, + "mean": 5.724863876821473e-05, + "std": 0.025697464123368263, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.13585864007472992, + "max": 0.12803053855895996, + "mean": -0.0054976665414869785, + "std": 0.039962489157915115, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.3.g": { + "min": 0.3546965718269348, + "max": 1.1723699569702148, + "mean": 0.7105212211608887, + "std": 0.10377959161996841, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6174826622009277, + "max": 0.5556296706199646, + "mean": 0.001160400453954935, + "std": 0.04611344262957573, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.18955032527446747, + "max": 0.024929288774728775, + "mean": -0.03484814986586571, + "std": 0.02862328663468361, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.130905032157898, + "max": 0.970402181148529, + "mean": 0.00035809652763418853, + "std": 0.04234178364276886, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.5977792143821716, + "max": 0.06286704540252686, + "mean": -0.004878203850239515, + "std": 0.028615841642022133, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.1.g": { + "min": 0.3753129839897156, + "max": 0.9404288530349731, + "mean": 0.5924519896507263, + "std": 0.06695062667131424, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.3918393850326538, + "max": 0.3694100081920624, + "mean": 7.003510108916089e-05, + "std": 0.03718580678105354, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11892382800579071, + "max": 0.1364460289478302, + "mean": 0.0009139248286373913, + "std": 0.02918536402285099, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.6189467310905457, + "max": 0.5086581707000732, + "mean": 1.522459842817625e-05, + "std": 0.036438774317502975, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.18658447265625, + "max": 8.788694381713867, + "mean": -0.10927355289459229, + "std": 1.6988238096237183, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.27650272846221924, + "max": 0.2397344559431076, + "mean": 5.2208531997166574e-05, + "std": 0.03261270374059677, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.051591187715530396, + "max": 0.039499007165431976, + "mean": 9.101108298636973e-05, + "std": 0.01296647172421217, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.2308182418346405, + "max": 0.23492185771465302, + "mean": -2.198125366703607e-05, + "std": 0.0293892789632082, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.20422494411468506, + "max": 0.10520327836275101, + "mean": -0.004020952619612217, + "std": 0.032637566328048706, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.3.g": { + "min": 0.3395068645477295, + "max": 1.0124397277832031, + "mean": 0.7006875872612, + "std": 0.09675538539886475, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5645881295204163, + "max": 0.8335761427879333, + "mean": 0.00041510065784677863, + "std": 0.04229363799095154, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.2121758759021759, + "max": 0.0300263874232769, + "mean": -0.032174285501241684, + "std": 0.026499440893530846, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7549118995666504, + "max": 0.7191137671470642, + "mean": -1.6272973880404606e-05, + "std": 0.03683432564139366, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.2633835971355438, + "max": 0.10630631446838379, + "mean": -0.00301279011182487, + "std": 0.028871648013591766, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.1.g": { + "min": 0.2839854061603546, + "max": 0.695024311542511, + "mean": 0.49937066435813904, + "std": 0.04653334617614746, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.2781727910041809, + "max": 0.23389220237731934, + "mean": -0.00011100011033704504, + "std": 0.0387568399310112, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15358875691890717, + "max": 0.12641564011573792, + "mean": -0.0022295925300568342, + "std": 0.03333538770675659, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.41443270444869995, + "max": 0.6594027280807495, + "mean": -1.858997711678967e-05, + "std": 0.03909648209810257, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.237802028656006, + "max": 4.722365379333496, + "mean": -0.020456280559301376, + "std": 1.0076717138290405, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.24511729180812836, + "max": 0.20752397179603577, + "mean": 4.432153218658641e-05, + "std": 0.03396220877766609, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.03445148468017578, + "max": 0.044871583580970764, + "mean": -1.9065962987951934e-05, + "std": 0.012637496925890446, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.20115934312343597, + "max": 0.20639759302139282, + "mean": -2.9241522497613914e-05, + "std": 0.031020423397421837, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.19977232813835144, + "max": 0.1132478341460228, + "mean": -0.002891883021220565, + "std": 0.03452973812818527, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.3.g": { + "min": 0.3667006194591522, + "max": 1.0575865507125854, + "mean": 0.6704831123352051, + "std": 0.06640235334634781, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.39832764863967896, + "max": 0.5020085573196411, + "mean": -3.8792531995568424e-05, + "std": 0.041129473596811295, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.12865175306797028, + "max": 0.02696564421057701, + "mean": -0.030531559139490128, + "std": 0.021883869543671608, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.44955554604530334, + "max": 0.4331819415092468, + "mean": 7.46890582377091e-05, + "std": 0.034889888018369675, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.26744911074638367, + "max": 0.07309805601835251, + "mean": -0.0010887861717492342, + "std": 0.023132896050810814, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.1.g": { + "min": 0.28746652603149414, + "max": 0.6852710843086243, + "mean": 0.5245163440704346, + "std": 0.04753531143069267, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.2225414365530014, + "max": 0.2233862727880478, + "mean": 1.5953022739267908e-05, + "std": 0.038948602974414825, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.13633988797664642, + "max": 0.10930000245571136, + "mean": 0.00024919791030697525, + "std": 0.029206812381744385, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.3749636113643646, + "max": 0.43756094574928284, + "mean": -9.44960629567504e-06, + "std": 0.03928674757480621, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.845799684524536, + "max": 4.999211311340332, + "mean": 0.009741385467350483, + "std": 0.8452029228210449, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.22279420495033264, + "max": 0.22023756802082062, + "mean": -3.8509870137204416e-07, + "std": 0.03440963104367256, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.04381667822599411, + "max": 0.03586551547050476, + "mean": -0.0002609736402519047, + "std": 0.012077639810740948, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.21273735165596008, + "max": 0.18841038644313812, + "mean": -1.714246354822535e-05, + "std": 0.031536102294921875, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.18087971210479736, + "max": 0.12077755481004715, + "mean": -0.0023926026187837124, + "std": 0.04127210006117821, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.3.g": { + "min": 0.4229143261909485, + "max": 0.941786527633667, + "mean": 0.6626389026641846, + "std": 0.056811243295669556, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.37079188227653503, + "max": 0.47652140259742737, + "mean": -8.189280197257176e-05, + "std": 0.040888600051403046, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.20858491957187653, + "max": 0.027342500165104866, + "mean": -0.03023093193769455, + "std": 0.021366029977798462, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.3407646119594574, + "max": 0.7343085408210754, + "mean": 8.227993384934962e-05, + "std": 0.03476560488343239, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.2401275634765625, + "max": 0.05064300820231438, + "mean": -0.0011859382502734661, + "std": 0.020460018888115883, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.1.g": { + "min": 0.3059234321117401, + "max": 0.6536071300506592, + "mean": 0.5251041054725647, + "std": 0.046117961406707764, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.30434539914131165, + "max": 0.21718497574329376, + "mean": 6.997769378358498e-05, + "std": 0.03949679434299469, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.1491607427597046, + "max": 0.1309996247291565, + "mean": 0.00032534098136238754, + "std": 0.030453510582447052, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.25696200132369995, + "max": 0.20183700323104858, + "mean": 3.1303323339670897e-05, + "std": 0.0394880436360836, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.3362133502960205, + "max": 2.3758370876312256, + "mean": -0.026241015642881393, + "std": 0.4497620761394501, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.1885133534669876, + "max": 0.21026504039764404, + "mean": 3.72500107914675e-05, + "std": 0.03479313850402832, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.03166966885328293, + "max": 0.035711731761693954, + "mean": -0.00019632275507319719, + "std": 0.012291603721678257, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.18826794624328613, + "max": 0.17029285430908203, + "mean": -6.840371497673914e-05, + "std": 0.03216983750462532, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.13950176537036896, + "max": 0.13710856437683105, + "mean": -0.002513276878744364, + "std": 0.05129357427358627, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.3.g": { + "min": 0.46702930331230164, + "max": 0.9555635452270508, + "mean": 0.6688482761383057, + "std": 0.05276886373758316, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.3244642913341522, + "max": 0.30925843119621277, + "mean": -9.10853486857377e-07, + "std": 0.04094461724162102, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.12482384592294693, + "max": 0.02569793164730072, + "mean": -0.03068721666932106, + "std": 0.019822420552372932, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.43951860070228577, + "max": 0.4452158510684967, + "mean": 9.512923134025186e-05, + "std": 0.03511851280927658, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.22458022832870483, + "max": 0.051897209137678146, + "mean": -0.0011794487945735455, + "std": 0.018467247486114502, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.1.g": { + "min": 0.3391944468021393, + "max": 0.7399035096168518, + "mean": 0.558688759803772, + "std": 0.04139659181237221, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.27298545837402344, + "max": 0.2789517045021057, + "mean": 2.041603875113651e-05, + "std": 0.041056908667087555, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.13676847517490387, + "max": 0.1398179680109024, + "mean": 0.0004908779519610107, + "std": 0.026629263535141945, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.49038437008857727, + "max": 0.35562369227409363, + "mean": 8.908439485821873e-05, + "std": 0.04069468006491661, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.297020673751831, + "max": 1.7451350688934326, + "mean": -0.02108073979616165, + "std": 0.5001184940338135, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.2181541919708252, + "max": 0.19748014211654663, + "mean": -4.031343632959761e-05, + "std": 0.034232787787914276, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.0411330908536911, + "max": 0.03885316848754883, + "mean": -0.00013403715274762362, + "std": 0.012882057577371597, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17773869633674622, + "max": 0.18285222351551056, + "mean": 4.8017449444159865e-05, + "std": 0.03155619651079178, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.18002314865589142, + "max": 0.18396146595478058, + "mean": -0.0022139688953757286, + "std": 0.05483314022421837, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.3.g": { + "min": 0.474223792552948, + "max": 1.025842308998108, + "mean": 0.6452140212059021, + "std": 0.05035461485385895, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.2715917229652405, + "max": 0.30928391218185425, + "mean": 0.00011250950046814978, + "std": 0.04068081080913544, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.10539427399635315, + "max": 0.026698507368564606, + "mean": -0.02951802872121334, + "std": 0.017934730276465416, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.3393958806991577, + "max": 0.3293214440345764, + "mean": 5.262523700366728e-05, + "std": 0.03441222757101059, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.18173128366470337, + "max": 0.04261557012796402, + "mean": -0.001059417612850666, + "std": 0.017207711935043335, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.1.g": { + "min": 0.32517319917678833, + "max": 0.6865989565849304, + "mean": 0.5111718773841858, + "std": 0.03694766014814377, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.2340274453163147, + "max": 0.22541004419326782, + "mean": -3.624596502049826e-05, + "std": 0.039175089448690414, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.11520740389823914, + "max": 0.1319286823272705, + "mean": 0.00015029977657832205, + "std": 0.029165174812078476, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.3522850573062897, + "max": 0.28482842445373535, + "mean": 6.6099587456847075e-06, + "std": 0.03924406319856644, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.132234573364258, + "max": 3.5437166690826416, + "mean": -0.011590443551540375, + "std": 0.6826013326644897, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.21073928475379944, + "max": 0.20945559442043304, + "mean": 3.4624928957782686e-05, + "std": 0.03448405861854553, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.035892292857170105, + "max": 0.0479779876768589, + "mean": 0.0007904525264166296, + "std": 0.012872384861111641, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.21030081808567047, + "max": 0.19305069744586945, + "mean": -9.318873708252795e-07, + "std": 0.03169514983892441, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.18656854331493378, + "max": 0.17726241052150726, + "mean": -0.002840438624843955, + "std": 0.0586128756403923, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.3.g": { + "min": 0.4746079444885254, + "max": 1.041317105293274, + "mean": 0.6513123512268066, + "std": 0.04965612292289734, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.24824990332126617, + "max": 0.32916077971458435, + "mean": 0.0001809034583857283, + "std": 0.04056909307837486, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.1252717822790146, + "max": 0.024853328242897987, + "mean": -0.03049679473042488, + "std": 0.01761467382311821, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.4204847514629364, + "max": 0.4814334511756897, + "mean": 1.0858502719202079e-06, + "std": 0.03539634868502617, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.1512894481420517, + "max": 0.0435330905020237, + "mean": 4.2967651097569615e-05, + "std": 0.014878639951348305, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.1.g": { + "min": 0.31564587354660034, + "max": 0.6816184520721436, + "mean": 0.5528937578201294, + "std": 0.04068783298134804, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.20636627078056335, + "max": 0.2197655737400055, + "mean": 3.1909676181385294e-05, + "std": 0.038298994302749634, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.13777659833431244, + "max": 0.11261031776666641, + "mean": 2.2643122065346688e-05, + "std": 0.025812044739723206, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.40279680490493774, + "max": 0.3708725571632385, + "mean": 2.5475083020864986e-05, + "std": 0.03817913681268692, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.770826816558838, + "max": 2.8686459064483643, + "mean": 0.001154756173491478, + "std": 0.5168185234069824, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.20366711914539337, + "max": 0.1976872831583023, + "mean": 2.9746484869974665e-05, + "std": 0.03429698571562767, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.050587497651576996, + "max": 0.039878759533166885, + "mean": -0.00042467116145417094, + "std": 0.013416356407105923, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.19594806432724, + "max": 0.20180270075798035, + "mean": -1.2511954992078245e-05, + "std": 0.031805265694856644, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.1929892897605896, + "max": 0.19512949883937836, + "mean": -0.002963980659842491, + "std": 0.06252874433994293, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.3.g": { + "min": 0.3488827645778656, + "max": 1.0837209224700928, + "mean": 0.6670882701873779, + "std": 0.05524449050426483, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22563330829143524, + "max": 0.25133612751960754, + "mean": 0.00035861917422153056, + "std": 0.040758710354566574, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.09100860357284546, + "max": 0.04368036612868309, + "mean": -0.03007863275706768, + "std": 0.01761433854699135, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.35325002670288086, + "max": 0.3038857877254486, + "mean": -4.542069655144587e-05, + "std": 0.037121765315532684, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.16173334419727325, + "max": 0.06341976672410965, + "mean": -7.59128452045843e-05, + "std": 0.019423963502049446, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.1.g": { + "min": 0.348746657371521, + "max": 0.7219499945640564, + "mean": 0.5423322916030884, + "std": 0.03906194120645523, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.21932680904865265, + "max": 0.22335435450077057, + "mean": -1.1452927537902724e-05, + "std": 0.03923005238175392, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.11840008199214935, + "max": 0.1704910695552826, + "mean": 0.00028676993679255247, + "std": 0.025109266862273216, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.24656711518764496, + "max": 0.30068346858024597, + "mean": -3.68916334991809e-05, + "std": 0.03892939165234566, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.504953145980835, + "max": 3.7143990993499756, + "mean": 0.015847017988562584, + "std": 0.7823704481124878, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.21910522878170013, + "max": 0.23737633228302002, + "mean": -1.3034959920332767e-05, + "std": 0.036302801221609116, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04721483215689659, + "max": 0.051370855420827866, + "mean": 0.00048040057299658656, + "std": 0.013522167690098286, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.2142011672258377, + "max": 0.21717870235443115, + "mean": 5.644252087222412e-05, + "std": 0.03361529856920242, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.21134759485721588, + "max": 0.23112934827804565, + "mean": -0.005099965259432793, + "std": 0.061861325055360794, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.3.g": { + "min": 0.361937016248703, + "max": 1.1009857654571533, + "mean": 0.6992422342300415, + "std": 0.053594909608364105, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.2350708544254303, + "max": 0.24471336603164673, + "mean": 0.00046341665438376367, + "std": 0.041268061846494675, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.0980960875749588, + "max": 0.06807035952806473, + "mean": -0.03142966330051422, + "std": 0.018127702176570892, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.30174583196640015, + "max": 0.3516803979873657, + "mean": -8.28510383144021e-05, + "std": 0.04027377441525459, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.1523003727197647, + "max": 0.1496732383966446, + "mean": 0.00026386568788439035, + "std": 0.023037536069750786, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.1.g": { + "min": 0.9992449879646301, + "max": 1.001513123512268, + "mean": 1.0000585317611694, + "std": 0.0006324834539555013, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.031258270144462585, + "max": 0.031254518777132034, + "mean": -1.929036807268858e-05, + "std": 0.018040649592876434, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.03122791275382042, + "max": 0.030987516045570374, + "mean": -0.0010841463226824999, + "std": 0.01795026659965515, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.03125518560409546, + "max": 0.0312589630484581, + "mean": 3.5481098166201264e-06, + "std": 0.018041057512164116, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.031153831630945206, + "max": 0.03117419220507145, + "mean": 0.00033391290344297886, + "std": 0.018062464892864227, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": -0.0006552772247232497, + "max": 0.0007129037985578179, + "mean": 5.131376383360475e-06, + "std": 0.0001946619595400989, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.3.g": { + "min": 0.997419536113739, + "max": 1.0028407573699951, + "mean": 0.9999656081199646, + "std": 0.000851841235999018, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.03356073051691055, + "max": 0.03384723141789436, + "mean": -5.6891162785177585e-06, + "std": 0.018047483637928963, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.03327289596199989, + "max": 0.03337877616286278, + "mean": -0.00020134463557042181, + "std": 0.017954064533114433, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.11.4.ff.2.weight": { + "min": -0.001495416508987546, + "max": 0.0016743302112445235, + "mean": 2.175480403820984e-06, + "std": 0.00029829132836312056, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.11.4.ff.2.bias": { + "min": -0.0005666155484504998, + "max": 0.0007540585356764495, + "mean": 8.17788895801641e-06, + "std": 0.00017612945521250367, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.1.g": { + "min": 0.3832930624485016, + "max": 0.7191212773323059, + "mean": 0.5806662440299988, + "std": 0.03885548189282417, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.239033043384552, + "max": 0.19648200273513794, + "mean": 2.5991641450673342e-05, + "std": 0.03746527060866356, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.11883819848299026, + "max": 0.1667412370443344, + "mean": 0.0009821474086493254, + "std": 0.02755241096019745, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.24662744998931885, + "max": 0.4999285340309143, + "mean": -5.0414026190992445e-05, + "std": 0.037622544914484024, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.941795825958252, + "max": 3.768937587738037, + "mean": -0.0035722628235816956, + "std": 0.681327760219574, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.22736430168151855, + "max": 0.25185492634773254, + "mean": -1.1772199286497198e-05, + "std": 0.037433888763189316, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07156982272863388, + "max": 0.08060310035943985, + "mean": -0.0005125089664943516, + "std": 0.01565583609044552, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.22800227999687195, + "max": 0.25769373774528503, + "mean": -2.863763802452013e-05, + "std": 0.035420775413513184, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.20050473511219025, + "max": 0.2148960828781128, + "mean": -0.005524474661797285, + "std": 0.06832842528820038, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.3.g": { + "min": 0.405087411403656, + "max": 1.1892733573913574, + "mean": 0.7378814816474915, + "std": 0.05523177236318588, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.2209046483039856, + "max": 0.24561487138271332, + "mean": 0.000521098030731082, + "std": 0.041335128247737885, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.1032090112566948, + "max": 0.02416798658668995, + "mean": -0.032665450125932693, + "std": 0.018891815096139908, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.4496724605560303, + "max": 0.4224262237548828, + "mean": -0.0004358820151537657, + "std": 0.04689519852399826, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.2515088617801666, + "max": 0.47011902928352356, + "mean": 0.003207466099411249, + "std": 0.044524550437927246, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.0.weight": { + "min": -0.3168761134147644, + "max": 0.3331414461135864, + "mean": -2.506819146219641e-05, + "std": 0.02128741703927517, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.13.1.g": { + "min": 0.3245299160480499, + "max": 0.6855776906013489, + "mean": 0.5709930658340454, + "std": 0.04470643773674965, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.1645486205816269, + "max": 0.1745065301656723, + "mean": -4.8789879656396806e-05, + "std": 0.03318168222904205, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18692335486412048, + "max": 0.14329002797603607, + "mean": 3.758035018108785e-05, + "std": 0.029700448736548424, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.3810470402240753, + "max": 0.24586895108222961, + "mean": -9.737135769682936e-06, + "std": 0.03276293724775314, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.6554298400878906, + "max": 3.2897167205810547, + "mean": -0.014251163229346275, + "std": 0.9850608110427856, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.23475222289562225, + "max": 0.2473384439945221, + "mean": -1.814275310607627e-05, + "std": 0.041697416454553604, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.0725652277469635, + "max": 0.15448249876499176, + "mean": 0.0006658083875663579, + "std": 0.02517012506723404, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.2663499712944031, + "max": 0.2480984330177307, + "mean": -1.5296925994334742e-05, + "std": 0.04013863205909729, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.18960432708263397, + "max": 0.194618359208107, + "mean": -0.0012379353865981102, + "std": 0.06668508052825928, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.3.g": { + "min": 0.32916781306266785, + "max": 0.9996783137321472, + "mean": 0.7191422581672668, + "std": 0.0523388646543026, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.23172040283679962, + "max": 0.2451343685388565, + "mean": 0.00018265256949234754, + "std": 0.04089942201972008, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11451541632413864, + "max": 0.01910208724439144, + "mean": -0.04247751086950302, + "std": 0.0188636165112257, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.38971978425979614, + "max": 0.40751317143440247, + "mean": -2.1620868210447952e-05, + "std": 0.04853251948952675, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.6930332779884338, + "max": 0.4125932455062866, + "mean": 0.0008482532575726509, + "std": 0.06028350815176964, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.0.weight": { + "min": -0.0015386008890345693, + "max": 1.0007996559143066, + "mean": 0.00048813552712090313, + "std": 0.022089246660470963, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.14.1.g": { + "min": 0.9992700219154358, + "max": 1.0015240907669067, + "mean": 1.0000568628311157, + "std": 0.000619773636572063, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.031252991408109665, + "max": 0.031256891787052155, + "mean": -2.1020092390244827e-05, + "std": 0.01803199015557766, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.03121490404009819, + "max": 0.03123173676431179, + "mean": -0.0006769870524294674, + "std": 0.01782653108239174, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.0312560498714447, + "max": 0.03126147389411926, + "mean": -8.831357263261452e-06, + "std": 0.01803101785480976, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.031231652945280075, + "max": 0.031244346871972084, + "mean": -0.0007297407719306648, + "std": 0.01794145628809929, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": -0.0005350728752091527, + "max": 0.0004281355068087578, + "mean": -3.930799721274525e-06, + "std": 0.00015574153803754598, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.3.g": { + "min": 0.9972792267799377, + "max": 1.0023835897445679, + "mean": 0.9995018243789673, + "std": 0.0008350047282874584, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.03338531777262688, + "max": 0.03282884135842323, + "mean": -2.971738467749674e-06, + "std": 0.018026772886514664, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.03250397369265556, + "max": 0.031224608421325684, + "mean": -0.0005561817670240998, + "std": 0.01803283393383026, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.14.4.ff.2.weight": { + "min": -0.001761053572408855, + "max": 0.0016201753169298172, + "mean": -9.977067065847223e-07, + "std": 0.00029509843443520367, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.14.4.ff.2.bias": { + "min": -0.0005179685540497303, + "max": 0.00046010586083866656, + "mean": -3.1889690035313834e-06, + "std": 0.00014008936705067754, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.0.weight": { + "min": -0.23426799476146698, + "max": 0.2724316120147705, + "mean": 6.618206498387735e-06, + "std": 0.01881008967757225, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.15.1.g": { + "min": 0.32140958309173584, + "max": 0.6938180923461914, + "mean": 0.58160400390625, + "std": 0.045936692506074905, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.18184486031532288, + "max": 0.19783763587474823, + "mean": -1.1537180398590863e-05, + "std": 0.03318366780877113, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.16044476628303528, + "max": 0.12933249771595, + "mean": -0.001071967650204897, + "std": 0.03413407504558563, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.33228737115859985, + "max": 0.31113728880882263, + "mean": -1.0175894203712232e-05, + "std": 0.03223416581749916, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.80244779586792, + "max": 8.761518478393555, + "mean": 0.093451589345932, + "std": 1.619434118270874, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23388099670410156, + "max": 0.2418091893196106, + "mean": 4.1715411498444155e-05, + "std": 0.04085543006658554, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07592413574457169, + "max": 0.06573085486888885, + "mean": 0.00048532572691328824, + "std": 0.019415952265262604, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.2459113746881485, + "max": 0.23399382829666138, + "mean": -3.2584175642114133e-06, + "std": 0.039430178701877594, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.1629519760608673, + "max": 0.16087952256202698, + "mean": 0.0016248535830527544, + "std": 0.06528551876544952, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.3.g": { + "min": 0.5571001172065735, + "max": 0.9435561299324036, + "mean": 0.712803840637207, + "std": 0.040119532495737076, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.2279409021139145, + "max": 0.25474709272384644, + "mean": -4.549993900582194e-05, + "std": 0.040573619306087494, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.13481706380844116, + "max": 0.02219359762966633, + "mean": -0.041350673884153366, + "std": 0.018385522067546844, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.42158395051956177, + "max": 0.3924521505832672, + "mean": -4.16895818489138e-06, + "std": 0.047782838344573975, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.607164204120636, + "max": 0.6512984037399292, + "mean": 0.0015855339588597417, + "std": 0.056834105402231216, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.0.weight": { + "min": -0.25181475281715393, + "max": 0.32078737020492554, + "mean": -6.139540346339345e-06, + "std": 0.019613103941082954, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.16.1.g": { + "min": 0.3595266342163086, + "max": 0.6821960806846619, + "mean": 0.5706722140312195, + "std": 0.042985353618860245, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.2202295958995819, + "max": 0.177076518535614, + "mean": -3.443878813413903e-05, + "std": 0.03429801017045975, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.16317804157733917, + "max": 0.23287786543369293, + "mean": 0.00035837513860315084, + "std": 0.03280922770500183, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.2639525532722473, + "max": 0.23980671167373657, + "mean": -5.297175084706396e-05, + "std": 0.03389657661318779, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.854193210601807, + "max": 5.090420722961426, + "mean": 0.043878111988306046, + "std": 1.2290726900100708, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.24640515446662903, + "max": 0.250241219997406, + "mean": 7.21166143193841e-05, + "std": 0.043985553085803986, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.06247914582490921, + "max": 0.054487086832523346, + "mean": 0.0006464287871494889, + "std": 0.017190182581543922, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.2863953709602356, + "max": 0.27215418219566345, + "mean": -5.014354974264279e-05, + "std": 0.0429837629199028, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.16105736792087555, + "max": 0.17032958567142487, + "mean": -0.0028887835796922445, + "std": 0.05930224433541298, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.3.g": { + "min": 0.5198467373847961, + "max": 0.9329147338867188, + "mean": 0.7133820652961731, + "std": 0.03842068091034889, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23785468935966492, + "max": 0.2487422525882721, + "mean": 0.00046461093006655574, + "std": 0.04045235738158226, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.14500072598457336, + "max": 0.04102769121527672, + "mean": -0.039694253355264664, + "std": 0.020542506128549576, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.532442569732666, + "max": 0.5823614597320557, + "mean": 6.013309757690877e-06, + "std": 0.04885788634419441, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5189021229743958, + "max": 0.4934021234512329, + "mean": 0.0023652694653719664, + "std": 0.05344180017709732, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.0.weight": { + "min": -0.2737113833427429, + "max": 0.3155929148197174, + "mean": 1.988332769542467e-06, + "std": 0.020049693062901497, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.17.1.g": { + "min": 0.3658766746520996, + "max": 0.7116788029670715, + "mean": 0.5931248664855957, + "std": 0.04595986381173134, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.21088893711566925, + "max": 0.19901061058044434, + "mean": 3.061449388042092e-05, + "std": 0.0348670557141304, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.18707768619060516, + "max": 0.20344795286655426, + "mean": 0.0009536991128697991, + "std": 0.03149910271167755, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.2897132933139801, + "max": 0.3398728668689728, + "mean": -4.695481766248122e-05, + "std": 0.034587565809488297, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.8768022060394287, + "max": 3.386897563934326, + "mean": 0.014455738477408886, + "std": 0.8582935929298401, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.22446562349796295, + "max": 0.24974551796913147, + "mean": -3.865096914523747e-06, + "std": 0.042228855192661285, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.055283673107624054, + "max": 0.046579472720623016, + "mean": -2.0229621441103518e-05, + "std": 0.015845011919736862, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.2932588756084442, + "max": 0.29019662737846375, + "mean": -7.67192614148371e-06, + "std": 0.04194393754005432, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.12487518787384033, + "max": 0.2589555084705353, + "mean": -0.0032450095750391483, + "std": 0.053175244480371475, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.3.g": { + "min": 0.45627039670944214, + "max": 0.8444806933403015, + "mean": 0.7054478526115417, + "std": 0.03522774204611778, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.512130856513977, + "max": 0.34817978739738464, + "mean": 0.00034297071397304535, + "std": 0.040197573602199554, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.18561507761478424, + "max": 0.039553456008434296, + "mean": -0.039388205856084824, + "std": 0.02135956473648548, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.5439714193344116, + "max": 0.5556594729423523, + "mean": -7.099103095242754e-05, + "std": 0.050732966512441635, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5116639137268066, + "max": 0.6642246842384338, + "mean": 0.002442360855638981, + "std": 0.04952433332800865, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.0.weight": { + "min": -0.33249908685684204, + "max": 0.2653781771659851, + "mean": 3.2569464565312956e-06, + "std": 0.019386788830161095, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.18.1.g": { + "min": 0.3219698965549469, + "max": 0.766376256942749, + "mean": 0.651033878326416, + "std": 0.04532676190137863, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.2498074471950531, + "max": 0.21987499296665192, + "mean": -1.9507724573486485e-06, + "std": 0.036501552909612656, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.3268783390522003, + "max": 0.2866748869419098, + "mean": -0.0006870508659631014, + "std": 0.03855406492948532, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.3101723790168762, + "max": 0.37016358971595764, + "mean": 6.504941848106682e-05, + "std": 0.03624220937490463, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.7166595458984375, + "max": 5.806900978088379, + "mean": 0.03795350342988968, + "std": 1.4129759073257446, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.22155693173408508, + "max": 0.2057628631591797, + "mean": -7.524936518166214e-05, + "std": 0.042484089732170105, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.07764487713575363, + "max": 0.051462698727846146, + "mean": -0.000925063737668097, + "std": 0.0164109468460083, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.33050650358200073, + "max": 0.329324871301651, + "mean": -4.5611386667587794e-06, + "std": 0.042790405452251434, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.2847575545310974, + "max": 0.11197607964277267, + "mean": -0.0012040773872286081, + "std": 0.04701252654194832, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.3.g": { + "min": 0.48601120710372925, + "max": 0.8868346214294434, + "mean": 0.7373513579368591, + "std": 0.038241803646087646, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.3624517619609833, + "max": 0.27458682656288147, + "mean": 5.118873013998382e-05, + "std": 0.040643129497766495, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.24757687747478485, + "max": 0.046393755823373795, + "mean": -0.039262838661670685, + "std": 0.023290209472179413, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.626139223575592, + "max": 0.5965114235877991, + "mean": -6.056673373677768e-05, + "std": 0.0531148836016655, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.7093748450279236, + "max": 0.2657814621925354, + "mean": 0.0009187416289933026, + "std": 0.05122179910540581, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.0.weight": { + "min": -0.3433896005153656, + "max": 0.3037145733833313, + "mean": 3.0547948881576303e-07, + "std": 0.019135164096951485, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.19.1.g": { + "min": 0.34973248839378357, + "max": 0.7829060554504395, + "mean": 0.6387954354286194, + "std": 0.049250222742557526, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.20535039901733398, + "max": 0.20685911178588867, + "mean": -5.973261431790888e-05, + "std": 0.03769532963633537, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.25850412249565125, + "max": 0.2679128050804138, + "mean": -0.00040441699093207717, + "std": 0.044591374695301056, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.354056179523468, + "max": 0.3223519027233124, + "mean": -6.86804014549125e-06, + "std": 0.03720388934016228, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.260861873626709, + "max": 4.203889846801758, + "mean": -0.02641155757009983, + "std": 1.0066218376159668, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.23860004544258118, + "max": 0.24336647987365723, + "mean": -2.503740142856259e-05, + "std": 0.043208908289670944, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.06237001344561577, + "max": 0.05677289888262749, + "mean": 0.0003429377684369683, + "std": 0.014151404611766338, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.43683916330337524, + "max": 0.37347522377967834, + "mean": 1.453105596738169e-05, + "std": 0.04412021487951279, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.096480593085289, + "max": 0.17590999603271484, + "mean": -0.0006604294758290052, + "std": 0.03515587002038956, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.3.g": { + "min": 0.4216778874397278, + "max": 1.0693583488464355, + "mean": 0.7482997179031372, + "std": 0.04205985367298126, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.2665577530860901, + "max": 0.2968434989452362, + "mean": -7.962346717249602e-05, + "std": 0.040803126990795135, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.18574897944927216, + "max": 0.04386778548359871, + "mean": -0.036819178611040115, + "std": 0.02561137080192566, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.45699048042297363, + "max": 0.4864794611930847, + "mean": 4.341273597674444e-05, + "std": 0.05420761927962303, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.28645777702331543, + "max": 0.5512458086013794, + "mean": -0.0008799894712865353, + "std": 0.04782594367861748, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.0.weight": { + "min": -0.29278504848480225, + "max": 0.32276028394699097, + "mean": 6.534221029141918e-06, + "std": 0.019969386979937553, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.20.1.g": { + "min": 0.29091978073120117, + "max": 0.760124921798706, + "mean": 0.6508240699768066, + "std": 0.05213485658168793, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.24355527758598328, + "max": 0.2617471516132355, + "mean": -6.045864211046137e-06, + "std": 0.03961271047592163, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.2675487995147705, + "max": 0.19986717402935028, + "mean": -0.0008803302189335227, + "std": 0.051758527755737305, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.2720382511615753, + "max": 0.25365304946899414, + "mean": 3.97135409002658e-06, + "std": 0.03870992362499237, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_k.bias": { + "min": -12.963478088378906, + "max": 15.945467948913574, + "mean": 0.03322439640760422, + "std": 1.988944411277771, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.20726989209651947, + "max": 0.2258823961019516, + "mean": -7.221873966045678e-05, + "std": 0.04055318236351013, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06934336572885513, + "max": 0.06329023838043213, + "mean": 0.00015188338875304908, + "std": 0.014744000509381294, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.46502813696861267, + "max": 0.3207668662071228, + "mean": 1.9557133782655e-05, + "std": 0.04058815911412239, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06403840333223343, + "max": 0.11518330872058868, + "mean": 0.001191072165966034, + "std": 0.02470429427921772, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.3.g": { + "min": 0.3746289610862732, + "max": 0.9322671294212341, + "mean": 0.7508296370506287, + "std": 0.040182456374168396, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.2793700397014618, + "max": 0.27312716841697693, + "mean": -0.00016854800924193114, + "std": 0.040993720293045044, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.19878964126110077, + "max": 0.050874363631010056, + "mean": -0.03202495723962784, + "std": 0.02511216513812542, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.6572921276092529, + "max": 0.5353701114654541, + "mean": -4.860567787545733e-05, + "std": 0.052844274789094925, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.19308353960514069, + "max": 0.5820099115371704, + "mean": -0.0005148603231646121, + "std": 0.04106666147708893, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.0.weight": { + "min": -0.41772764921188354, + "max": 0.3719545602798462, + "mean": 6.02346335654147e-06, + "std": 0.021620826795697212, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.21.1.g": { + "min": 0.21424666047096252, + "max": 0.7470943331718445, + "mean": 0.6495506763458252, + "std": 0.05437405779957771, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.2095523476600647, + "max": 0.19568544626235962, + "mean": 4.010393604403362e-05, + "std": 0.03946491330862045, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.32928818464279175, + "max": 0.2594093382358551, + "mean": -0.0032241325825452805, + "std": 0.05625630542635918, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.2056845873594284, + "max": 0.254710853099823, + "mean": 5.4258445743471384e-05, + "std": 0.038567040115594864, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.242719650268555, + "max": 6.931571006774902, + "mean": 0.04833323508501053, + "std": 1.384921908378601, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.20961354672908783, + "max": 0.2300715446472168, + "mean": -5.3330231821746565e-06, + "std": 0.04131212830543518, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.04391402378678322, + "max": 0.03599291667342186, + "mean": 3.6780984373763204e-06, + "std": 0.012800832279026508, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.39794921875, + "max": 0.34475040435791016, + "mean": -5.557174881687388e-05, + "std": 0.0423884317278862, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.055058449506759644, + "max": 0.06288675218820572, + "mean": 0.0003690638695843518, + "std": 0.018671618774533272, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.3.g": { + "min": 0.3500124216079712, + "max": 1.0451101064682007, + "mean": 0.789310097694397, + "std": 0.048743680119514465, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.33340734243392944, + "max": 0.3858667314052582, + "mean": -0.00016963679809123278, + "std": 0.04147941246628761, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.15730711817741394, + "max": 0.05913476645946503, + "mean": -0.031834498047828674, + "std": 0.025142161175608635, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6963925361633301, + "max": 0.46865832805633545, + "mean": -9.133096318691969e-05, + "std": 0.05179010331630707, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.248288094997406, + "max": 0.3285192847251892, + "mean": -0.0002480646944604814, + "std": 0.04143183305859566, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.0.weight": { + "min": -0.2872416079044342, + "max": 0.35022279620170593, + "mean": -2.109378556269803e-06, + "std": 0.024238325655460358, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.22.1.g": { + "min": 0.19658105075359344, + "max": 0.7791422605514526, + "mean": 0.6702942848205566, + "std": 0.0586935319006443, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.22860872745513916, + "max": 0.2311849147081375, + "mean": -1.9817682186840102e-05, + "std": 0.04044090211391449, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.21965830028057098, + "max": 0.2406904250383377, + "mean": 0.0007772702374495566, + "std": 0.05579812079668045, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.21554625034332275, + "max": 0.2266112118959427, + "mean": -7.155907223932445e-05, + "std": 0.03937710076570511, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.904163360595703, + "max": 9.067035675048828, + "mean": -0.001250317320227623, + "std": 1.848069429397583, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.26928046345710754, + "max": 0.2589084208011627, + "mean": 4.358497244538739e-05, + "std": 0.03840699419379234, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.05760843679308891, + "max": 0.057633914053440094, + "mean": 0.0003498811274766922, + "std": 0.014721624553203583, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.265085905790329, + "max": 0.2886793613433838, + "mean": -6.175917224027216e-05, + "std": 0.03907330706715584, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.043753523379564285, + "max": 0.03726416453719139, + "mean": -8.701729530002922e-05, + "std": 0.013365592807531357, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.3.g": { + "min": 0.3394947946071625, + "max": 1.092633843421936, + "mean": 0.8636797666549683, + "std": 0.06384899467229843, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.42328590154647827, + "max": 0.4191039204597473, + "mean": 0.0003126378287561238, + "std": 0.043501876294612885, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.2147369235754013, + "max": 0.17059248685836792, + "mean": -0.029485618695616722, + "std": 0.03195330873131752, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.5996397733688354, + "max": 0.5595637559890747, + "mean": -0.00015250420256052166, + "std": 0.05344444885849953, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17845340073108673, + "max": 0.37662389874458313, + "mean": 0.0013645882718265057, + "std": 0.037309858947992325, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.0.weight": { + "min": -0.3942789137363434, + "max": 0.36899739503860474, + "mean": 3.645062679424882e-05, + "std": 0.028621336445212364, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.23.1.g": { + "min": 0.2902868390083313, + "max": 0.8265326619148254, + "mean": 0.7055679559707642, + "std": 0.0678958147764206, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.926041305065155, + "max": 1.026432991027832, + "mean": -2.5475666916463524e-05, + "std": 0.0476241335272789, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.87814861536026, + "max": 0.8150070905685425, + "mean": -0.00031320619746111333, + "std": 0.09553563594818115, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.2693868577480316, + "max": 0.24089287221431732, + "mean": -2.29374309128616e-05, + "std": 0.03895637392997742, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.73939323425293, + "max": 22.84785270690918, + "mean": -0.0918712168931961, + "std": 4.0697784423828125, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.22775250673294067, + "max": 0.24510256946086884, + "mean": -2.5825131160672754e-05, + "std": 0.03863884136080742, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.06045493483543396, + "max": 0.04607832431793213, + "mean": -0.00014694462879560888, + "std": 0.01469829585403204, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.33846479654312134, + "max": 0.37447792291641235, + "mean": 7.293592716450803e-06, + "std": 0.04081470146775246, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.04649795591831207, + "max": 0.19573213160037994, + "mean": 0.00027208085521124303, + "std": 0.013573010452091694, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.3.g": { + "min": 0.37458330392837524, + "max": 1.1300410032272339, + "mean": 0.8900002241134644, + "std": 0.06398438662290573, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.4478272497653961, + "max": 0.5424814224243164, + "mean": 2.45622759393882e-05, + "std": 0.045566376298666, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.22404029965400696, + "max": 0.08835332095623016, + "mean": -0.032017190009355545, + "std": 0.03776315227150917, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7251995801925659, + "max": 0.6892821788787842, + "mean": 3.438512794673443e-05, + "std": 0.05177679285407066, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.1745474934577942, + "max": 0.2185421884059906, + "mean": 4.038875340484083e-05, + "std": 0.03178102895617485, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.0.weight": { + "min": -0.3403666019439697, + "max": 0.3743104040622711, + "mean": 4.2970114009222016e-05, + "std": 0.03414527699351311, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.24.1.g": { + "min": 0.31756705045700073, + "max": 1.2868698835372925, + "mean": 0.6014533042907715, + "std": 0.08345934003591537, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.28337857127189636, + "max": 0.26026472449302673, + "mean": -3.1064557788340608e-06, + "std": 0.03598480299115181, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23555569350719452, + "max": 0.2053573727607727, + "mean": 0.0002324726083315909, + "std": 0.05600997060537338, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.4354943335056305, + "max": 0.3252315819263458, + "mean": 2.4552073227823712e-05, + "std": 0.03413620963692665, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.544710159301758, + "max": 7.31260871887207, + "mean": -0.007366638630628586, + "std": 0.6992178559303284, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.34383082389831543, + "max": 0.3635445833206177, + "mean": 0.00010339185246266425, + "std": 0.04782695323228836, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07375096529722214, + "max": 0.06034737080335617, + "mean": 0.000933139817789197, + "std": 0.014950517565011978, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.2554619610309601, + "max": 0.28651097416877747, + "mean": 4.460267518879846e-06, + "std": 0.04155408963561058, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.055337581783533096, + "max": 0.06284268200397491, + "mean": 0.00014179576828610152, + "std": 0.007177725899964571, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.3.g": { + "min": 0.4937240481376648, + "max": 1.2209070920944214, + "mean": 1.01340913772583, + "std": 0.11743401736021042, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.0935479402542114, + "max": 1.0468977689743042, + "mean": -4.9845290050143376e-05, + "std": 0.05240994319319725, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.22365206480026245, + "max": 0.17271095514297485, + "mean": -0.027249177917838097, + "std": 0.03635435923933983, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8846310973167419, + "max": 0.9225372672080994, + "mean": -0.00014597778499592096, + "std": 0.053280774503946304, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.17102883756160736, + "max": 0.3799268901348114, + "mean": 0.0033686391543596983, + "std": 0.039900682866573334, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.0.weight": { + "min": -0.7772161960601807, + "max": 0.7236161828041077, + "mean": 1.9240971596445888e-05, + "std": 0.04616595432162285, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.25.1.g": { + "min": 0.33854806423187256, + "max": 1.4277222156524658, + "mean": 0.9483012557029724, + "std": 0.20673148334026337, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.7455986738204956, + "max": 1.7045377492904663, + "mean": 0.00022702554997522384, + "std": 0.15868352353572845, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.199636459350586, + "max": 1.0996308326721191, + "mean": -0.009536425583064556, + "std": 0.20382796227931976, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.4213047921657562, + "max": 0.4262976348400116, + "mean": 6.459288124460727e-05, + "std": 0.04801792651414871, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.743492126464844, + "max": 19.538597106933594, + "mean": -0.24829509854316711, + "std": 4.776083946228027, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.3239092528820038, + "max": 0.43836328387260437, + "mean": -1.204050931846723e-05, + "std": 0.046160612255334854, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.0340789370238781, + "max": 0.03713114559650421, + "mean": 0.0006417044205591083, + "std": 0.012921737506985664, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.7034957408905029, + "max": 0.664257287979126, + "mean": 4.352344694780186e-05, + "std": 0.05788278207182884, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.07222186028957367, + "max": 0.06749024242162704, + "mean": -0.00013264152221381664, + "std": 0.012920759618282318, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.3.g": { + "min": 0.38012510538101196, + "max": 1.3909755945205688, + "mean": 1.0665355920791626, + "std": 0.21970459818840027, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6164048314094543, + "max": 0.7170195579528809, + "mean": 0.00011136491957586259, + "std": 0.05802035331726074, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.21974356472492218, + "max": 0.22506725788116455, + "mean": 0.006242978852242231, + "std": 0.04973088204860687, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6296619176864624, + "max": 0.8891851902008057, + "mean": 1.1489293683553115e-05, + "std": 0.023526353761553764, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.5068330764770508, + "max": 0.4739985764026642, + "mean": -0.0030159270390868187, + "std": 0.06930534541606903, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.norm_out.g": { + "min": 0.5377116799354553, + "max": 1.180783748626709, + "mean": 0.7827296257019043, + "std": 0.09886873513460159, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.proj_out.weight": { + "min": -0.2669491767883301, + "max": 0.21265925467014313, + "mean": -0.00022343886666931212, + "std": 0.05399514362215996, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ] + }, + "transformer.proj_out.bias": { + "min": -0.23786094784736633, + "max": 0.014840648509562016, + "mean": -0.04396260902285576, + "std": 0.034334905445575714, + "sparsity": 0.0, + "shape": [ + 100 + ] + } + } +} \ No newline at end of file