diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,4683 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "recommendations": { + "focus_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ] + }, + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.4304574429988861, + "max": 0.2989666759967804, + "mean": -0.0025583612732589245, + "std": 0.042551927268505096, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ] + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.06317814439535141, + "max": 0.10763632506132126, + "mean": 0.0005897035007365048, + "std": 0.03411067649722099, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.4125778377056122, + "max": 0.8363006114959717, + "mean": -0.00021047875634394586, + "std": 0.024107400327920914, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.1154782623052597, + "max": 0.32146546244621277, + "mean": -0.0009399052942171693, + "std": 0.019577190279960632, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.7917730808258057, + "max": 2.8704917430877686, + "mean": -0.0003648003621492535, + "std": 0.6153737306594849, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ] + }, + "transformer.input_embed.proj.weight": { + "min": -0.27894294261932373, + "max": 0.38190174102783203, + "mean": 0.00042033716454170644, + "std": 0.042750339955091476, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ] + }, + "transformer.input_embed.proj.bias": { + "min": -0.2222987860441208, + "max": 0.20967179536819458, + "mean": -0.00449405936524272, + "std": 0.04091016948223114, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.4279242753982544, + "max": 0.47530120611190796, + "mean": 2.540943796702777e-06, + "std": 0.024509120732545853, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.32545235753059387, + "max": 0.15698140859603882, + "mean": -0.0467013455927372, + "std": 0.051578979939222336, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.41039708256721497, + "max": 0.3545180857181549, + "mean": -0.00012633543519768864, + "std": 0.023601215332746506, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.2297646850347519, + "max": 0.26262199878692627, + "mean": -0.029148615896701813, + "std": 0.049347542226314545, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.1.g": { + "min": 0.2546185553073883, + "max": 0.8200821876525879, + "mean": 0.5254418849945068, + "std": 0.08080805093050003, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.29693102836608887, + "max": 0.26530489325523376, + "mean": -0.00042408728040754795, + "std": 0.032104212790727615, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.09274528920650482, + "max": 0.12482056021690369, + "mean": 0.0006486810743808746, + "std": 0.025742707774043083, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.29047587513923645, + "max": 0.28141430020332336, + "mean": -7.6991505920887e-05, + "std": 0.03093625046312809, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.899471282958984, + "max": 5.8142476081848145, + "mean": -0.009332108311355114, + "std": 1.2954597473144531, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.42482444643974304, + "max": 0.34377753734588623, + "mean": 9.762628906173632e-05, + "std": 0.02995302341878414, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.028968220576643944, + "max": 0.027649197727441788, + "mean": -0.0003115592699032277, + "std": 0.012572345323860645, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.45394477248191833, + "max": 0.44869503378868103, + "mean": 2.2737156541552395e-05, + "std": 0.023855075240135193, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.08868509531021118, + "max": 0.0911499559879303, + "mean": 0.002273137215524912, + "std": 0.019512129947543144, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.3.g": { + "min": 0.2666190564632416, + "max": 1.0562766790390015, + "mean": 0.531130313873291, + "std": 0.1044141948223114, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5744591355323792, + "max": 0.6083897948265076, + "mean": -0.00043104952783323824, + "std": 0.03859502077102661, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.1818137913942337, + "max": 0.045760128647089005, + "mean": -0.029441693797707558, + "std": 0.042590487748384476, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.166682481765747, + "max": 1.634623646736145, + "mean": 0.0003185438981745392, + "std": 0.02769385650753975, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.16253960132598877, + "max": 0.2057240754365921, + "mean": -0.021116681396961212, + "std": 0.027940358966588974, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.1.g": { + "min": 0.2244873046875, + "max": 0.8436590433120728, + "mean": 0.48752978444099426, + "std": 0.07519952952861786, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.25530415773391724, + "max": 0.3058406710624695, + "mean": -9.383336873725057e-06, + "std": 0.03347048535943031, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.09549209475517273, + "max": 0.11042480170726776, + "mean": 5.650718230754137e-05, + "std": 0.02698545530438423, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.2974618077278137, + "max": 0.295981764793396, + "mean": 5.020356547902338e-05, + "std": 0.03253836929798126, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.164300918579102, + "max": 5.084524154663086, + "mean": -0.0145945493131876, + "std": 1.1573816537857056, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.3448942005634308, + "max": 0.3434945046901703, + "mean": 7.886815001256764e-05, + "std": 0.030058231204748154, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.036158282309770584, + "max": 0.03324951231479645, + "mean": -0.00014386117982212454, + "std": 0.013023010455071926, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.31528618931770325, + "max": 0.3752082884311676, + "mean": -2.1654177544405684e-05, + "std": 0.024055516347289085, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.10527443885803223, + "max": 0.12188493460416794, + "mean": -0.001954286126419902, + "std": 0.0288428645581007, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.3.g": { + "min": 0.31180328130722046, + "max": 1.120958685874939, + "mean": 0.6662410497665405, + "std": 0.09774944931268692, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.872490644454956, + "max": 0.627565324306488, + "mean": 0.0016757093835622072, + "std": 0.047438349574804306, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.27100008726119995, + "max": 0.03407798707485199, + "mean": -0.04660271108150482, + "std": 0.04059542715549469, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.9202945232391357, + "max": 0.9643993973731995, + "mean": 0.0010207913583144546, + "std": 0.04070187732577324, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.14455102384090424, + "max": 0.07482050359249115, + "mean": -0.009084243327379227, + "std": 0.025694938376545906, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.1.g": { + "min": 0.23976297676563263, + "max": 0.7124081254005432, + "mean": 0.4472041726112366, + "std": 0.05932378023862839, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.27300503849983215, + "max": 0.297477126121521, + "mean": 8.662666004966013e-06, + "std": 0.035474397242069244, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.11903306841850281, + "max": 0.11846816539764404, + "mean": 0.0007502126973122358, + "std": 0.02760804258286953, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.28101953864097595, + "max": 0.27942612767219543, + "mean": -7.648450991837308e-05, + "std": 0.03510245680809021, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.5096001625061035, + "max": 2.5215961933135986, + "mean": 0.026745397597551346, + "std": 0.586780309677124, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.22110240161418915, + "max": 0.27161508798599243, + "mean": 2.438401679683011e-06, + "std": 0.030731581151485443, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.033151235431432724, + "max": 0.031146494671702385, + "mean": 0.00011706411896739155, + "std": 0.012394252233207226, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.23539645969867706, + "max": 0.23185278475284576, + "mean": 5.7256078434875235e-05, + "std": 0.025697633624076843, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.13603141903877258, + "max": 0.1280086189508438, + "mean": -0.005497735925018787, + "std": 0.03996264934539795, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.3.g": { + "min": 0.3547299802303314, + "max": 1.1723523139953613, + "mean": 0.7105399370193481, + "std": 0.10377444326877594, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6173876523971558, + "max": 0.5556272268295288, + "mean": 0.001160334562882781, + "std": 0.046114034950733185, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.18945953249931335, + "max": 0.024937259033322334, + "mean": -0.034846723079681396, + "std": 0.028622858226299286, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.1309547424316406, + "max": 0.97038733959198, + "mean": 0.00035909086000174284, + "std": 0.04234256222844124, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.5978560447692871, + "max": 0.06273925304412842, + "mean": -0.0048814816400408745, + "std": 0.028621360659599304, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.1.g": { + "min": 0.3753381073474884, + "max": 0.9404851794242859, + "mean": 0.592466413974762, + "std": 0.06694933772087097, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.3917763829231262, + "max": 0.36936038732528687, + "mean": 7.001425547059625e-05, + "std": 0.0371866449713707, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11900075525045395, + "max": 0.13653883337974548, + "mean": 0.0009160788613371551, + "std": 0.029187612235546112, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.6190802454948425, + "max": 0.508792519569397, + "mean": 1.5223037735268008e-05, + "std": 0.036439377814531326, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.18681526184082, + "max": 8.788924217224121, + "mean": -0.10927566885948181, + "std": 1.6988582611083984, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.27652865648269653, + "max": 0.2397209107875824, + "mean": 5.228666486800648e-05, + "std": 0.03261314332485199, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.0514988899230957, + "max": 0.03946297615766525, + "mean": 9.359161776956171e-05, + "std": 0.012969369068741798, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.23075971007347107, + "max": 0.23487111926078796, + "mean": -2.203527037636377e-05, + "std": 0.029389776289463043, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.20423616468906403, + "max": 0.1052512601017952, + "mean": -0.004020487889647484, + "std": 0.03263992816209793, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.3.g": { + "min": 0.33965712785720825, + "max": 1.012444019317627, + "mean": 0.7007054090499878, + "std": 0.09675901383161545, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5647616982460022, + "max": 0.8335906267166138, + "mean": 0.0004150677123107016, + "std": 0.04229460284113884, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.21212875843048096, + "max": 0.029963094741106033, + "mean": -0.03217349201440811, + "std": 0.026498712599277496, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7548851370811462, + "max": 0.719126284122467, + "mean": -1.581827746122144e-05, + "std": 0.036835212260484695, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.2634251117706299, + "max": 0.1063019409775734, + "mean": -0.0030143139883875847, + "std": 0.028873277828097343, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.1.g": { + "min": 0.28394702076911926, + "max": 0.6950414180755615, + "mean": 0.4993884563446045, + "std": 0.04653454199433327, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.2782432436943054, + "max": 0.2338251918554306, + "mean": -0.00011091169290011749, + "std": 0.03875752165913582, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15358327329158783, + "max": 0.12643983960151672, + "mean": -0.0022276192903518677, + "std": 0.033326249569654465, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.41438740491867065, + "max": 0.6594708561897278, + "mean": -1.851528577390127e-05, + "std": 0.039096731692552567, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.237917423248291, + "max": 4.722480773925781, + "mean": -0.020456865429878235, + "std": 1.0076923370361328, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.245052769780159, + "max": 0.20759740471839905, + "mean": 4.428692045621574e-05, + "std": 0.0339626781642437, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.034463901072740555, + "max": 0.04485860466957092, + "mean": -2.209081139881164e-05, + "std": 0.012639513239264488, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.2011018991470337, + "max": 0.20644338428974152, + "mean": -2.9357790481299162e-05, + "std": 0.03102092258632183, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.19982746243476868, + "max": 0.11318917572498322, + "mean": -0.0028952043503522873, + "std": 0.03453591465950012, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.3.g": { + "min": 0.36675214767456055, + "max": 1.0576648712158203, + "mean": 0.6704948544502258, + "std": 0.06640778481960297, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.39844217896461487, + "max": 0.5021068453788757, + "mean": -3.8750327803427354e-05, + "std": 0.04113020375370979, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.12863779067993164, + "max": 0.026958497241139412, + "mean": -0.030533233657479286, + "std": 0.02188229374587536, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.449487566947937, + "max": 0.43325698375701904, + "mean": 7.53812346374616e-05, + "std": 0.03489059582352638, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.2675015926361084, + "max": 0.07307843118906021, + "mean": -0.0010904058581218123, + "std": 0.02313595451414585, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.1.g": { + "min": 0.28754422068595886, + "max": 0.6852768659591675, + "mean": 0.5245310068130493, + "std": 0.04753505066037178, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.22266238927841187, + "max": 0.22331833839416504, + "mean": 1.5918290955596603e-05, + "std": 0.038949232548475266, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.13635052740573883, + "max": 0.10933808237314224, + "mean": 0.00024784280685707927, + "std": 0.029207777231931686, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.37493839859962463, + "max": 0.43759685754776, + "mean": -9.403542208019644e-06, + "std": 0.03928738459944725, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.8458573818206787, + "max": 4.999326705932617, + "mean": 0.009741819463670254, + "std": 0.8452204465866089, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.22270528972148895, + "max": 0.22029587626457214, + "mean": -3.1911031328490935e-07, + "std": 0.034410301595926285, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.043785978108644485, + "max": 0.03592836111783981, + "mean": -0.0002596271806396544, + "std": 0.012078739702701569, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.21270592510700226, + "max": 0.18842868506908417, + "mean": -1.7000973457470536e-05, + "std": 0.03153671696782112, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.1809375286102295, + "max": 0.12074985355138779, + "mean": -0.002395304851233959, + "std": 0.04127994924783707, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.3.g": { + "min": 0.422917902469635, + "max": 0.9417884349822998, + "mean": 0.6626536250114441, + "std": 0.05681688338518143, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.3708776533603668, + "max": 0.4765470623970032, + "mean": -8.20929926703684e-05, + "std": 0.04088940471410751, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.20849654078483582, + "max": 0.0273736622184515, + "mean": -0.03023475781083107, + "std": 0.021363815292716026, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.3406715989112854, + "max": 0.7341561913490295, + "mean": 8.243846968980506e-05, + "std": 0.03476623818278313, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.24016188085079193, + "max": 0.05046152323484421, + "mean": -0.0011865879641845822, + "std": 0.020459504798054695, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.1.g": { + "min": 0.30588385462760925, + "max": 0.6534701585769653, + "mean": 0.5251248478889465, + "std": 0.04612228646874428, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.30431559681892395, + "max": 0.21719232201576233, + "mean": 6.998516619205475e-05, + "std": 0.039497170597314835, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.14912384748458862, + "max": 0.13098323345184326, + "mean": 0.0003266759740654379, + "std": 0.03045588731765747, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.25694772601127625, + "max": 0.201896533370018, + "mean": 3.129036849713884e-05, + "std": 0.0394882932305336, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.336271047592163, + "max": 2.375894784927368, + "mean": -0.026241114363074303, + "std": 0.44977155327796936, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.18857863545417786, + "max": 0.21028850972652435, + "mean": 3.711117460625246e-05, + "std": 0.034793779253959656, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.03168531507253647, + "max": 0.03566686809062958, + "mean": -0.00019767877529375255, + "std": 0.012288626283407211, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.18829987943172455, + "max": 0.17024517059326172, + "mean": -6.836466491222382e-05, + "std": 0.03217046335339546, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.13942238688468933, + "max": 0.1372329592704773, + "mean": -0.002514950931072235, + "std": 0.05129847675561905, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.3.g": { + "min": 0.4670739769935608, + "max": 0.955595850944519, + "mean": 0.6688634157180786, + "std": 0.05277201533317566, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.3244037926197052, + "max": 0.309257835149765, + "mean": -1.045628778229002e-06, + "std": 0.04094540327787399, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.1248614490032196, + "max": 0.025666970759630203, + "mean": -0.030689720064401627, + "std": 0.019823001697659492, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.43948638439178467, + "max": 0.44534069299697876, + "mean": 9.591381240170449e-05, + "std": 0.035119153559207916, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.2246266007423401, + "max": 0.051820773631334305, + "mean": -0.0011818428756669164, + "std": 0.018466750159859657, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.1.g": { + "min": 0.33914706110954285, + "max": 0.7398536205291748, + "mean": 0.5587007999420166, + "std": 0.04139573872089386, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.2729904353618622, + "max": 0.27884039282798767, + "mean": 2.0351768398541026e-05, + "std": 0.04105766862630844, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.13680818676948547, + "max": 0.13977055251598358, + "mean": 0.0004918644553981721, + "std": 0.02663181535899639, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.49051523208618164, + "max": 0.35575586557388306, + "mean": 8.911330223781988e-05, + "std": 0.04069535806775093, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.2970781326293945, + "max": 1.745163917541504, + "mean": -0.021079789847135544, + "std": 0.500128984451294, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.2181096374988556, + "max": 0.1974443644285202, + "mean": -4.0170674765249714e-05, + "std": 0.03423338383436203, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.041142482310533524, + "max": 0.03885917738080025, + "mean": -0.0001360031747026369, + "std": 0.012883774936199188, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17761866748332977, + "max": 0.1828862875699997, + "mean": 4.801471368409693e-05, + "std": 0.03155674412846565, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.1799207329750061, + "max": 0.18389682471752167, + "mean": -0.0022146617993712425, + "std": 0.05482979863882065, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.3.g": { + "min": 0.474190354347229, + "max": 1.0258487462997437, + "mean": 0.6452326774597168, + "std": 0.05035318806767464, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.27163514494895935, + "max": 0.3091295659542084, + "mean": 0.00011244519555475563, + "std": 0.04068158566951752, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.10526852309703827, + "max": 0.026741184294223785, + "mean": -0.029519207775592804, + "std": 0.01793486438691616, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.33932313323020935, + "max": 0.329169899225235, + "mean": 5.2667885029222816e-05, + "std": 0.03441279008984566, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.18180307745933533, + "max": 0.042509548366069794, + "mean": -0.0010597179643809795, + "std": 0.017209293320775032, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.1.g": { + "min": 0.32517459988594055, + "max": 0.6865665912628174, + "mean": 0.511164128780365, + "std": 0.03695276752114296, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.23393133282661438, + "max": 0.2253761738538742, + "mean": -3.613880107877776e-05, + "std": 0.039175428450107574, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.11511484533548355, + "max": 0.13181191682815552, + "mean": 0.00015029555652290583, + "std": 0.029160132631659508, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.35229772329330444, + "max": 0.28487107157707214, + "mean": 6.5603690018178895e-06, + "std": 0.03924452140927315, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.132349967956543, + "max": 3.543774366378784, + "mean": -0.011590607464313507, + "std": 0.6826151609420776, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.21073584258556366, + "max": 0.20936711132526398, + "mean": 3.4690663596848026e-05, + "std": 0.03448447957634926, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.03585724160075188, + "max": 0.047966208308935165, + "mean": 0.0007884915685281157, + "std": 0.012871142476797104, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.21028311550617218, + "max": 0.19305972754955292, + "mean": -9.823215805226937e-07, + "std": 0.031695324927568436, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.1864088624715805, + "max": 0.17721442878246307, + "mean": -0.0028417375870049, + "std": 0.058615218847990036, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.3.g": { + "min": 0.47462186217308044, + "max": 1.0414687395095825, + "mean": 0.651329517364502, + "std": 0.049656689167022705, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.24834677577018738, + "max": 0.3290989398956299, + "mean": 0.00018076221749652177, + "std": 0.04056994616985321, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.12541481852531433, + "max": 0.024957137182354927, + "mean": -0.030498644337058067, + "std": 0.017614001408219337, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.4203978180885315, + "max": 0.4814401865005493, + "mean": 1.1958536560996436e-06, + "std": 0.03539701923727989, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.15133719146251678, + "max": 0.04343123733997345, + "mean": 4.256972897564992e-05, + "std": 0.014886128716170788, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.1.g": { + "min": 0.31556373834609985, + "max": 0.6816186308860779, + "mean": 0.5528932809829712, + "std": 0.04069383069872856, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.20636020600795746, + "max": 0.21985411643981934, + "mean": 3.188779010088183e-05, + "std": 0.03829942271113396, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.13772568106651306, + "max": 0.1125853881239891, + "mean": 2.6155808882322162e-05, + "std": 0.025809435173869133, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.40282922983169556, + "max": 0.37083154916763306, + "mean": 2.5528193873469718e-05, + "std": 0.03817952424287796, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.7708845138549805, + "max": 2.868703603744507, + "mean": 0.0011554225347936153, + "std": 0.5168288946151733, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.20372195541858673, + "max": 0.1975945085287094, + "mean": 2.9724978958256543e-05, + "std": 0.03429732471704483, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.0505308173596859, + "max": 0.039880186319351196, + "mean": -0.0004213028587400913, + "std": 0.01341495756059885, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.19602739810943604, + "max": 0.20172414183616638, + "mean": -1.2448943380150013e-05, + "std": 0.031805410981178284, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.19294138252735138, + "max": 0.19508768618106842, + "mean": -0.0029671685770154, + "std": 0.06252522766590118, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.3.g": { + "min": 0.348909467458725, + "max": 1.083768367767334, + "mean": 0.667101263999939, + "std": 0.055243175476789474, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22561651468276978, + "max": 0.2514271140098572, + "mean": 0.0003585518861655146, + "std": 0.04075947403907776, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.09107004851102829, + "max": 0.04363898187875748, + "mean": -0.03007982112467289, + "std": 0.017611678689718246, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.353363573551178, + "max": 0.3039560914039612, + "mean": -4.4702926970785484e-05, + "std": 0.037122584879398346, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.16167114675045013, + "max": 0.06346774101257324, + "mean": -7.894223381299525e-05, + "std": 0.019427189603447914, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.1.g": { + "min": 0.34871092438697815, + "max": 0.7219411134719849, + "mean": 0.5423486828804016, + "std": 0.03906320407986641, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.219291090965271, + "max": 0.22339218854904175, + "mean": -1.1523573448357638e-05, + "std": 0.03923090174794197, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.118381567299366, + "max": 0.17055465281009674, + "mean": 0.00028248116723261774, + "std": 0.025117389857769012, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.24647831916809082, + "max": 0.30066463351249695, + "mean": -3.701161767821759e-05, + "std": 0.03893034905195236, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.5050106048583984, + "max": 3.714456796646118, + "mean": 0.015847081318497658, + "std": 0.7823866009712219, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.2191196233034134, + "max": 0.2373991161584854, + "mean": -1.3136124835000373e-05, + "std": 0.03630338981747627, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04720474034547806, + "max": 0.051363855600357056, + "mean": 0.00048070820048451424, + "std": 0.013523152098059654, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.21417121589183807, + "max": 0.21722286939620972, + "mean": 5.63644825888332e-05, + "std": 0.0336158350110054, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.21132777631282806, + "max": 0.2312006652355194, + "mean": -0.0050989487208426, + "std": 0.06185900419950485, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.3.g": { + "min": 0.36193206906318665, + "max": 1.1010645627975464, + "mean": 0.6992560029029846, + "std": 0.05359357222914696, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.2351117730140686, + "max": 0.24475757777690887, + "mean": 0.00046337785897776484, + "std": 0.041268885135650635, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.09809085726737976, + "max": 0.06809623539447784, + "mean": -0.0314301960170269, + "std": 0.018128085881471634, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.30171892046928406, + "max": 0.35163986682891846, + "mean": -8.267226803582162e-05, + "std": 0.04027453064918518, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.1522630751132965, + "max": 0.14965395629405975, + "mean": 0.0002633024996612221, + "std": 0.023038938641548157, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.1.g": { + "min": 0.9992594122886658, + "max": 1.0015419721603394, + "mean": 1.0000762939453125, + "std": 0.0006376681849360466, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.03125917166471481, + "max": 0.03125542029738426, + "mean": -1.929077916429378e-05, + "std": 0.018040984869003296, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.031228363513946533, + "max": 0.030987966805696487, + "mean": -0.0010841633193194866, + "std": 0.017950600013136864, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.03125608712434769, + "max": 0.03125986456871033, + "mean": 3.548163931554882e-06, + "std": 0.018041392788290977, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.03115428239107132, + "max": 0.031174642965197563, + "mean": 0.00033392058685421944, + "std": 0.01806280016899109, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": -0.0006233988679014146, + "max": 0.0007061311043798923, + "mean": 4.538033408607589e-06, + "std": 0.0001893796434160322, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.3.g": { + "min": 0.997599720954895, + "max": 1.002988576889038, + "mean": 0.9999969601631165, + "std": 0.000850954616907984, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.033545054495334625, + "max": 0.033692505210638046, + "mean": -6.091411705710925e-06, + "std": 0.018047811463475227, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.033063653856515884, + "max": 0.033412136137485504, + "mean": -0.00018106887000612915, + "std": 0.017954090610146523, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.11.4.ff.2.weight": { + "min": -0.001468250178731978, + "max": 0.0015634398441761732, + "mean": 1.9080666788795497e-06, + "std": 0.00028948785620741546, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.11.4.ff.2.bias": { + "min": -0.0005752606084570289, + "max": 0.0007690406637266278, + "mean": 7.6006986091670115e-06, + "std": 0.00017151834617834538, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.1.g": { + "min": 0.3833079934120178, + "max": 0.7191449403762817, + "mean": 0.5806841254234314, + "std": 0.03885476291179657, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.23893281817436218, + "max": 0.19658899307250977, + "mean": 2.609232979011722e-05, + "std": 0.03746626526117325, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.11880965530872345, + "max": 0.1667701154947281, + "mean": 0.000981115852482617, + "std": 0.02755648083984852, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.2465641349554062, + "max": 0.49993160367012024, + "mean": -5.0439630285836756e-05, + "std": 0.03762364014983177, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.9418535232543945, + "max": 3.7689952850341797, + "mean": -0.003572138026356697, + "std": 0.6813418865203857, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.2274625599384308, + "max": 0.25183549523353577, + "mean": -1.1858754987770226e-05, + "std": 0.03743482381105423, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07157625257968903, + "max": 0.08059139549732208, + "mean": -0.0005097019020467997, + "std": 0.0156550370156765, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.22814570367336273, + "max": 0.2576799690723419, + "mean": -2.8758266125805676e-05, + "std": 0.03542165458202362, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.20052188634872437, + "max": 0.21483485400676727, + "mean": -0.0055272276513278484, + "std": 0.06832942366600037, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.3.g": { + "min": 0.40502721071243286, + "max": 1.189380407333374, + "mean": 0.7378897666931152, + "std": 0.05522923544049263, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.22088685631752014, + "max": 0.2456110566854477, + "mean": 0.0005211912211962044, + "std": 0.04133584350347519, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.10322928428649902, + "max": 0.024186961352825165, + "mean": -0.03266708552837372, + "std": 0.018890798091888428, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.44966569542884827, + "max": 0.42246878147125244, + "mean": -0.00043506931979209185, + "std": 0.04689610004425049, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.2515268921852112, + "max": 0.47013524174690247, + "mean": 0.003204584587365389, + "std": 0.04452726989984512, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.0.weight": { + "min": -0.31688186526298523, + "max": 0.33314481377601624, + "mean": -2.5167657440761104e-05, + "std": 0.02128784917294979, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.13.1.g": { + "min": 0.3244757652282715, + "max": 0.6856456398963928, + "mean": 0.5710105299949646, + "std": 0.044706691056489944, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.16456718742847443, + "max": 0.17448973655700684, + "mean": -4.871570490649901e-05, + "std": 0.03318251296877861, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18692979216575623, + "max": 0.14325818419456482, + "mean": 3.459470462985337e-05, + "std": 0.029701216146349907, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.38104259967803955, + "max": 0.2459549903869629, + "mean": -9.848581612459384e-06, + "std": 0.03276371210813522, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.655487537384033, + "max": 3.2897744178771973, + "mean": -0.01425144076347351, + "std": 0.985081136226654, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.23475398123264313, + "max": 0.24735963344573975, + "mean": -1.814730239857454e-05, + "std": 0.041698258370161057, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.07251452654600143, + "max": 0.15445762872695923, + "mean": 0.0006656228797510266, + "std": 0.0251647736877203, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.26630881428718567, + "max": 0.2481267750263214, + "mean": -1.5170076949289069e-05, + "std": 0.0401393324136734, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.1895921230316162, + "max": 0.19462409615516663, + "mean": -0.001237674499861896, + "std": 0.06668463349342346, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.3.g": { + "min": 0.32920053601264954, + "max": 0.999627411365509, + "mean": 0.7191565632820129, + "std": 0.052332233637571335, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.23170752823352814, + "max": 0.24531398713588715, + "mean": 0.00018265214748680592, + "std": 0.040900230407714844, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11451739817857742, + "max": 0.019039874896407127, + "mean": -0.0424770824611187, + "std": 0.018864724785089493, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.38964730501174927, + "max": 0.40745288133621216, + "mean": -2.1833995560882613e-05, + "std": 0.0485333576798439, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.6929526925086975, + "max": 0.4126836955547333, + "mean": 0.0008477572700940073, + "std": 0.060282669961452484, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.0.weight": { + "min": -0.0013933395966887474, + "max": 1.000746726989746, + "mean": 0.00048820103984326124, + "std": 0.022089513018727303, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.14.1.g": { + "min": 0.9992843866348267, + "max": 1.001552939414978, + "mean": 1.0000746250152588, + "std": 0.0006248687277548015, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.03125389292836189, + "max": 0.03125779330730438, + "mean": -2.1020408894401044e-05, + "std": 0.01803232543170452, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.031215354800224304, + "max": 0.031232187524437904, + "mean": -0.0006770011968910694, + "std": 0.017826862633228302, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.03125695139169693, + "max": 0.03126237541437149, + "mean": -8.831485502014402e-06, + "std": 0.018031351268291473, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.03123210370540619, + "max": 0.03124479576945305, + "mean": -0.0007297537522390485, + "std": 0.017941787838935852, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": -0.0005147741758264601, + "max": 0.00041916739428415895, + "mean": -4.1531684473739006e-06, + "std": 0.0001558788208058104, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.3.g": { + "min": 0.997329831123352, + "max": 1.0023579597473145, + "mean": 0.9995578527450562, + "std": 0.0008328193798661232, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.033257633447647095, + "max": 0.03283705189824104, + "mean": -2.9398686365311733e-06, + "std": 0.01802799478173256, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.0324481800198555, + "max": 0.03130009397864342, + "mean": -0.000511951744556427, + "std": 0.01803583651781082, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.14.4.ff.2.weight": { + "min": -0.0017112370114773512, + "max": 0.0015153783606365323, + "mean": -1.2167475915703108e-06, + "std": 0.00028721734997816384, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.14.4.ff.2.bias": { + "min": -0.00046955313882790506, + "max": 0.0003882118908222765, + "mean": -3.8059165490267333e-06, + "std": 0.00014281016774475574, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.0.weight": { + "min": -0.23431308567523956, + "max": 0.2725020945072174, + "mean": 6.621908141823951e-06, + "std": 0.018810350447893143, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.15.1.g": { + "min": 0.32144924998283386, + "max": 0.6939579248428345, + "mean": 0.5816149711608887, + "std": 0.045937687158584595, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.18192073702812195, + "max": 0.1977624148130417, + "mean": -1.1576559700188227e-05, + "std": 0.03318417817354202, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.16049131751060486, + "max": 0.1293114274740219, + "mean": -0.00107291666790843, + "std": 0.03413516655564308, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.3323962688446045, + "max": 0.31116873025894165, + "mean": -1.0262579962727614e-05, + "std": 0.03223471716046333, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.802563190460205, + "max": 8.761749267578125, + "mean": 0.09345458447933197, + "std": 1.6194684505462646, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23397405445575714, + "max": 0.2418195903301239, + "mean": 4.162176628597081e-05, + "std": 0.04085618257522583, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07595669478178024, + "max": 0.0657576471567154, + "mean": 0.00048221880570054054, + "std": 0.019416553899645805, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.2459147870540619, + "max": 0.23389238119125366, + "mean": -3.2510670280316845e-06, + "std": 0.03943093866109848, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.1629837304353714, + "max": 0.16088047623634338, + "mean": 0.0016233830247074366, + "std": 0.06528986245393753, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.3.g": { + "min": 0.5571612119674683, + "max": 0.9436106085777283, + "mean": 0.7128171324729919, + "std": 0.04012364149093628, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.22801116108894348, + "max": 0.2548006474971771, + "mean": -4.5571337977889925e-05, + "std": 0.04057438299059868, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.13471974432468414, + "max": 0.0221097432076931, + "mean": -0.041352279484272, + "std": 0.01838749460875988, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.42162591218948364, + "max": 0.3923877477645874, + "mean": -4.321471351431683e-06, + "std": 0.04778357967734337, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.6071884632110596, + "max": 0.651282787322998, + "mean": 0.0015848546754568815, + "std": 0.0568372942507267, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.0.weight": { + "min": -0.25181877613067627, + "max": 0.32084232568740845, + "mean": -6.161948476801626e-06, + "std": 0.019613562151789665, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.16.1.g": { + "min": 0.35955217480659485, + "max": 0.6821547150611877, + "mean": 0.5706839561462402, + "std": 0.0429888591170311, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.22016532719135284, + "max": 0.17702604830265045, + "mean": -3.4450480598025024e-05, + "std": 0.034298721700906754, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.1631413698196411, + "max": 0.23277200758457184, + "mean": 0.000363422412192449, + "std": 0.032813675701618195, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.2639073431491852, + "max": 0.2398279309272766, + "mean": -5.2961986511945724e-05, + "std": 0.033897411078214645, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.854308605194092, + "max": 5.090536117553711, + "mean": 0.04387902468442917, + "std": 1.2290979623794556, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.24643683433532715, + "max": 0.2503347098827362, + "mean": 7.216692029032856e-05, + "std": 0.04398633539676666, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.06248769536614418, + "max": 0.05441384017467499, + "mean": 0.0006457050913013518, + "std": 0.017188573256134987, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.2864288091659546, + "max": 0.2721114456653595, + "mean": -5.008514563087374e-05, + "std": 0.04298446327447891, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.16100545227527618, + "max": 0.170342355966568, + "mean": -0.0028870203532278538, + "std": 0.059300076216459274, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.3.g": { + "min": 0.5198097229003906, + "max": 0.9330063462257385, + "mean": 0.7133984565734863, + "std": 0.03842313215136528, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23787352442741394, + "max": 0.24874305725097656, + "mean": 0.0004645891021937132, + "std": 0.04045315086841583, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.14499974250793457, + "max": 0.04109013453125954, + "mean": -0.039695803076028824, + "std": 0.020541805773973465, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.5323729515075684, + "max": 0.5824694633483887, + "mean": 5.902071279706433e-06, + "std": 0.04885893687605858, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5189845561981201, + "max": 0.4933343231678009, + "mean": 0.0023664908949285746, + "std": 0.05344504490494728, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.0.weight": { + "min": -0.2737047076225281, + "max": 0.31558480858802795, + "mean": 1.935944737851969e-06, + "std": 0.020050112158060074, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.17.1.g": { + "min": 0.3658909797668457, + "max": 0.7117034196853638, + "mean": 0.5931328535079956, + "std": 0.04596179351210594, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.2108193188905716, + "max": 0.1990451216697693, + "mean": 3.062548057641834e-05, + "std": 0.034867268055677414, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.18712614476680756, + "max": 0.20343470573425293, + "mean": 0.0009520579478703439, + "std": 0.031497176736593246, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.2896800935268402, + "max": 0.3398098945617676, + "mean": -4.6883709728717804e-05, + "std": 0.03458770364522934, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.8768599033355713, + "max": 3.3869552612304688, + "mean": 0.014455841854214668, + "std": 0.8583106398582458, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.22448983788490295, + "max": 0.24981370568275452, + "mean": -3.890434527420439e-06, + "std": 0.042229313403367996, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.05526347830891609, + "max": 0.046524014323949814, + "mean": -2.1809362806379795e-05, + "std": 0.01583988219499588, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.2933104932308197, + "max": 0.29035091400146484, + "mean": -7.618443305545952e-06, + "std": 0.04194440320134163, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.124831423163414, + "max": 0.25899115204811096, + "mean": -0.0032436971087008715, + "std": 0.05317322164773941, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.3.g": { + "min": 0.45623838901519775, + "max": 0.844422459602356, + "mean": 0.7054718732833862, + "std": 0.03522763401269913, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.5120505094528198, + "max": 0.3482021689414978, + "mean": 0.00034296896774321795, + "std": 0.04019856080412865, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.18573501706123352, + "max": 0.03954247012734413, + "mean": -0.039387013763189316, + "std": 0.02136080153286457, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.543980062007904, + "max": 0.5556398034095764, + "mean": -7.12752080289647e-05, + "std": 0.050733935087919235, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5116539001464844, + "max": 0.6641847491264343, + "mean": 0.0024422036949545145, + "std": 0.049520041793584824, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.0.weight": { + "min": -0.3325117230415344, + "max": 0.2653426229953766, + "mean": 3.3086610073951306e-06, + "std": 0.019387137144804, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.18.1.g": { + "min": 0.3219893276691437, + "max": 0.7664631009101868, + "mean": 0.6510411500930786, + "std": 0.04532777890563011, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.2498156577348709, + "max": 0.2198626697063446, + "mean": -1.886132849904243e-06, + "std": 0.03650164604187012, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.32695695757865906, + "max": 0.2867416441440582, + "mean": -0.000684951723087579, + "std": 0.03855687379837036, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.31001296639442444, + "max": 0.3700636327266693, + "mean": 6.516962457681075e-05, + "std": 0.036242250353097916, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.716774940490723, + "max": 5.807016372680664, + "mean": 0.03795425221323967, + "std": 1.4130064249038696, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.22152067720890045, + "max": 0.20586349070072174, + "mean": -7.513246237067506e-05, + "std": 0.042484886944293976, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.0776548758149147, + "max": 0.05150791257619858, + "mean": -0.0009258093778043985, + "std": 0.016412504017353058, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.33054521679878235, + "max": 0.32925283908843994, + "mean": -4.675353011407424e-06, + "std": 0.042791180312633514, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.284753680229187, + "max": 0.1120273545384407, + "mean": -0.0012038055574521422, + "std": 0.04701421782374382, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.3.g": { + "min": 0.4860539734363556, + "max": 0.8868206739425659, + "mean": 0.7373669743537903, + "std": 0.03824283927679062, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.362324595451355, + "max": 0.27455514669418335, + "mean": 5.109608173370361e-05, + "std": 0.04064401239156723, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.24754445254802704, + "max": 0.046375077217817307, + "mean": -0.039263028651475906, + "std": 0.02328905090689659, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.6261394023895264, + "max": 0.5965179204940796, + "mean": -5.992384103592485e-05, + "std": 0.053116101771593094, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.7094439268112183, + "max": 0.2657933533191681, + "mean": 0.000917100696824491, + "std": 0.05122515559196472, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.0.weight": { + "min": -0.3433791399002075, + "max": 0.30369648337364197, + "mean": 2.4011274035729e-07, + "std": 0.019135721027851105, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.19.1.g": { + "min": 0.34975123405456543, + "max": 0.7829355597496033, + "mean": 0.6388096809387207, + "std": 0.049248941242694855, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.20544706284999847, + "max": 0.20679640769958496, + "mean": -5.99185805185698e-05, + "std": 0.037696123123168945, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.2586185336112976, + "max": 0.2680370807647705, + "mean": -0.00040146420360542834, + "std": 0.04459588602185249, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.3540765345096588, + "max": 0.3223837912082672, + "mean": -6.969309197302209e-06, + "std": 0.03720474615693092, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.260976791381836, + "max": 4.204005241394043, + "mean": -0.026412418112158775, + "std": 1.0066431760787964, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.23861557245254517, + "max": 0.24334679543972015, + "mean": -2.5082641514018178e-05, + "std": 0.04320957139134407, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.06232341378927231, + "max": 0.056674133986234665, + "mean": 0.0003426429466344416, + "std": 0.01415110845118761, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.43692541122436523, + "max": 0.37342891097068787, + "mean": 1.4435072444030084e-05, + "std": 0.04412085935473442, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.09643299132585526, + "max": 0.17589901387691498, + "mean": -0.0006592142744921148, + "std": 0.03515716642141342, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.3.g": { + "min": 0.4216461777687073, + "max": 1.0694262981414795, + "mean": 0.7483195662498474, + "std": 0.04205932468175888, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.2665816843509674, + "max": 0.2969212532043457, + "mean": -7.953966996865347e-05, + "std": 0.04080412909388542, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.1857525259256363, + "max": 0.043901920318603516, + "mean": -0.036818623542785645, + "std": 0.025608688592910767, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.4569249451160431, + "max": 0.4865773022174835, + "mean": 4.3881707824766636e-05, + "std": 0.05420896038413048, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.28651300072669983, + "max": 0.5512722134590149, + "mean": -0.00088057282846421, + "std": 0.04782658815383911, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.0.weight": { + "min": -0.292865514755249, + "max": 0.32280707359313965, + "mean": 6.539526111737359e-06, + "std": 0.019969915971159935, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.20.1.g": { + "min": 0.2909410893917084, + "max": 0.7601442337036133, + "mean": 0.6508233547210693, + "std": 0.05213604494929314, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.2434738278388977, + "max": 0.2616451680660248, + "mean": -6.040764219505945e-06, + "std": 0.03961297869682312, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.2675459682941437, + "max": 0.1998538225889206, + "mean": -0.0008808721322566271, + "std": 0.05175367370247841, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.2721429765224457, + "max": 0.25373363494873047, + "mean": 4.028795956401154e-06, + "std": 0.03871006891131401, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_k.bias": { + "min": -12.963708877563477, + "max": 15.945626258850098, + "mean": 0.03322511166334152, + "std": 1.988985300064087, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.2071155309677124, + "max": 0.22583135962486267, + "mean": -7.227471360238269e-05, + "std": 0.04055366292595863, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06934842467308044, + "max": 0.06322810798883438, + "mean": 0.00015266213449649513, + "std": 0.01474202610552311, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.46502697467803955, + "max": 0.32068270444869995, + "mean": 1.9500737835187465e-05, + "std": 0.0405886135995388, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06406750530004501, + "max": 0.1152099147439003, + "mean": 0.0011921885889023542, + "std": 0.0247051939368248, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.3.g": { + "min": 0.37462663650512695, + "max": 0.9322708249092102, + "mean": 0.7508515119552612, + "std": 0.040188200771808624, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.27930018305778503, + "max": 0.2731732130050659, + "mean": -0.00016858182789292186, + "std": 0.040994688868522644, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.19882012903690338, + "max": 0.05084774270653725, + "mean": -0.03202420845627785, + "std": 0.025111209601163864, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.6573402285575867, + "max": 0.5352922677993774, + "mean": -4.871936471317895e-05, + "std": 0.05284557491540909, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.1931021511554718, + "max": 0.5820591449737549, + "mean": -0.0005149454809725285, + "std": 0.04106936603784561, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.0.weight": { + "min": -0.4177095592021942, + "max": 0.37194108963012695, + "mean": 6.037503226252738e-06, + "std": 0.021621696650981903, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.21.1.g": { + "min": 0.21426498889923096, + "max": 0.7471067905426025, + "mean": 0.6495591998100281, + "std": 0.05437273159623146, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.20954997837543488, + "max": 0.19577716290950775, + "mean": 4.0040544263320044e-05, + "std": 0.03946496173739433, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.3292751908302307, + "max": 0.25935792922973633, + "mean": -0.003224420826882124, + "std": 0.05625506490468979, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.2056337594985962, + "max": 0.25471389293670654, + "mean": 5.435157800093293e-05, + "std": 0.038567062467336655, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.24283504486084, + "max": 6.9316864013671875, + "mean": 0.048334453254938126, + "std": 1.3849503993988037, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.20960700511932373, + "max": 0.23016247153282166, + "mean": -5.2383575166459195e-06, + "std": 0.04131292924284935, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.043877486139535904, + "max": 0.035942550748586655, + "mean": 4.677800461649895e-06, + "std": 0.012800506316125393, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.39784368872642517, + "max": 0.3448275029659271, + "mean": -5.554455128731206e-05, + "std": 0.04238935187458992, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.05505242943763733, + "max": 0.06286512315273285, + "mean": 0.0003699597145896405, + "std": 0.018672524020075798, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.3.g": { + "min": 0.3501029312610626, + "max": 1.0451030731201172, + "mean": 0.7893401980400085, + "std": 0.04874471575021744, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.3334510326385498, + "max": 0.38586220145225525, + "mean": -0.0001694880920695141, + "std": 0.041480448096990585, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.15723954141139984, + "max": 0.05913884937763214, + "mean": -0.031833715736866, + "std": 0.025140652433037758, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6964147090911865, + "max": 0.4686952233314514, + "mean": -9.150124969892204e-05, + "std": 0.05179166793823242, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.24826228618621826, + "max": 0.32854214310646057, + "mean": -0.00024761329405009747, + "std": 0.0414327047765255, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.0.weight": { + "min": -0.2872432768344879, + "max": 0.35023465752601624, + "mean": -2.1361338440328836e-06, + "std": 0.024239059537649155, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.22.1.g": { + "min": 0.19656625390052795, + "max": 0.7792166471481323, + "mean": 0.6702941060066223, + "std": 0.058692529797554016, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.22861525416374207, + "max": 0.23119905591011047, + "mean": -1.981826062547043e-05, + "std": 0.04044099524617195, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.21965257823467255, + "max": 0.24067652225494385, + "mean": 0.0007787347421981394, + "std": 0.05579977110028267, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.215622216463089, + "max": 0.22666674852371216, + "mean": -7.155455386964604e-05, + "std": 0.03937716409564018, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.904394149780273, + "max": 9.067266464233398, + "mean": -0.001250309869647026, + "std": 1.8481073379516602, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.2693168520927429, + "max": 0.25895655155181885, + "mean": 4.356484714662656e-05, + "std": 0.038407694548368454, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.05762461572885513, + "max": 0.057689178735017776, + "mean": 0.00034963880898430943, + "std": 0.014724270440638065, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.2649986743927002, + "max": 0.28868991136550903, + "mean": -6.175809539854527e-05, + "std": 0.039074063301086426, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.043768905103206635, + "max": 0.0373171903192997, + "mean": -8.572106889914721e-05, + "std": 0.013365655206143856, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.3.g": { + "min": 0.3394976556301117, + "max": 1.0926626920700073, + "mean": 0.86370849609375, + "std": 0.06385412812232971, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.42326879501342773, + "max": 0.419196218252182, + "mean": 0.00031274266075342894, + "std": 0.043502915650606155, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.21476341784000397, + "max": 0.17061911523342133, + "mean": -0.029481371864676476, + "std": 0.031948987394571304, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.5996708869934082, + "max": 0.5596612691879272, + "mean": -0.00015256566985044628, + "std": 0.053446218371391296, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17847125232219696, + "max": 0.3766724169254303, + "mean": 0.0013643248239532113, + "std": 0.037309642881155014, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.0.weight": { + "min": -0.39427170157432556, + "max": 0.3689534664154053, + "mean": 3.643418676801957e-05, + "std": 0.028621334582567215, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.23.1.g": { + "min": 0.2903065085411072, + "max": 0.826573371887207, + "mean": 0.7055738568305969, + "std": 0.06789194792509079, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.9261522889137268, + "max": 1.0264601707458496, + "mean": -2.5637811631895602e-05, + "std": 0.047625649720430374, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.8783160448074341, + "max": 0.8149734735488892, + "mean": -0.00031416097772307694, + "std": 0.09553803503513336, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.2693849802017212, + "max": 0.24096263945102692, + "mean": -2.2922044081497006e-05, + "std": 0.03895637020468712, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.73985481262207, + "max": 22.84831428527832, + "mean": -0.09187203645706177, + "std": 4.069868564605713, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.227765753865242, + "max": 0.24508675932884216, + "mean": -2.5811230443650857e-05, + "std": 0.03863935545086861, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.06041998043656349, + "max": 0.046056248247623444, + "mean": -0.00014605963951908052, + "std": 0.014698919840157032, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.33846333622932434, + "max": 0.3745211064815521, + "mean": 7.246726454468444e-06, + "std": 0.04081542044878006, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.0464671291410923, + "max": 0.1957084834575653, + "mean": 0.0002726902603171766, + "std": 0.013569602742791176, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.3.g": { + "min": 0.3744957149028778, + "max": 1.1300216913223267, + "mean": 0.8900200724601746, + "std": 0.06398579478263855, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.4477945864200592, + "max": 0.5424723625183105, + "mean": 2.4591532564954832e-05, + "std": 0.04556761309504509, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.22407397627830505, + "max": 0.08826831728219986, + "mean": -0.03201541677117348, + "std": 0.03776346147060394, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7253258228302002, + "max": 0.6892617344856262, + "mean": 3.4524080547271296e-05, + "std": 0.05177822336554527, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.1745493859052658, + "max": 0.21855643391609192, + "mean": 4.002213245257735e-05, + "std": 0.0317784883081913, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.0.weight": { + "min": -0.3402628004550934, + "max": 0.37424033880233765, + "mean": 4.292904486646876e-05, + "std": 0.03414493426680565, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.24.1.g": { + "min": 0.3175790011882782, + "max": 1.2868926525115967, + "mean": 0.6014685034751892, + "std": 0.0834617167711258, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.28334787487983704, + "max": 0.26021766662597656, + "mean": -3.078439021919621e-06, + "std": 0.03598484769463539, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23551659286022186, + "max": 0.20537099242210388, + "mean": 0.0002320160565432161, + "std": 0.056010857224464417, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.4354335069656372, + "max": 0.3252001106739044, + "mean": 2.4517319616279565e-05, + "std": 0.03413575515151024, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.544912338256836, + "max": 7.312640190124512, + "mean": -0.007366480305790901, + "std": 0.6992346048355103, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.343842089176178, + "max": 0.36349090933799744, + "mean": 0.0001033815206028521, + "std": 0.04782803729176521, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07375385612249374, + "max": 0.06036338210105896, + "mean": 0.0009326335857622325, + "std": 0.014949528500437737, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.25554072856903076, + "max": 0.28654900193214417, + "mean": 4.4343978515826166e-06, + "std": 0.041555255651474, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.05532766133546829, + "max": 0.06282689422369003, + "mean": 0.00014148413902148604, + "std": 0.007174154743552208, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.3.g": { + "min": 0.49368223547935486, + "max": 1.2208430767059326, + "mean": 1.0134273767471313, + "std": 0.11743992567062378, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.0936156511306763, + "max": 1.0469433069229126, + "mean": -4.977267235517502e-05, + "std": 0.05241084843873978, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.22367312014102936, + "max": 0.17280347645282745, + "mean": -0.02724579907953739, + "std": 0.03635029122233391, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8845533132553101, + "max": 0.9224876165390015, + "mean": -0.000146063175634481, + "std": 0.053282301872968674, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.17102308571338654, + "max": 0.37991419434547424, + "mean": 0.003368670353665948, + "std": 0.03989797830581665, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.0.weight": { + "min": -0.7772527933120728, + "max": 0.7234945297241211, + "mean": 1.913893902383279e-05, + "std": 0.04616517201066017, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.25.1.g": { + "min": 0.3385581970214844, + "max": 1.4277539253234863, + "mean": 0.9483213424682617, + "std": 0.20673882961273193, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.7455896139144897, + "max": 1.7045435905456543, + "mean": 0.00022695818915963173, + "std": 0.15868604183197021, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.199622631072998, + "max": 1.099592685699463, + "mean": -0.00953536294400692, + "std": 0.203833669424057, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.4213031232357025, + "max": 0.42637819051742554, + "mean": 6.450257205870003e-05, + "std": 0.048018429428339005, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.743934631347656, + "max": 19.539039611816406, + "mean": -0.24830012023448944, + "std": 4.776192665100098, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.32387086749076843, + "max": 0.4384032189846039, + "mean": -1.2015252650598995e-05, + "std": 0.046161383390426636, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.0340605154633522, + "max": 0.037125036120414734, + "mean": 0.0006421188591048121, + "std": 0.012921434827148914, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.703487753868103, + "max": 0.6645694375038147, + "mean": 4.3493168050190434e-05, + "std": 0.0578836165368557, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.0722307413816452, + "max": 0.06750312447547913, + "mean": -0.00013278273399919271, + "std": 0.012919807806611061, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.3.g": { + "min": 0.3801887333393097, + "max": 1.3909631967544556, + "mean": 1.0665581226348877, + "std": 0.2197146713733673, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6164069175720215, + "max": 0.7170259952545166, + "mean": 0.00011130621714983135, + "std": 0.058021292090415955, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.21958374977111816, + "max": 0.2251792550086975, + "mean": 0.0062429094687104225, + "std": 0.04972800984978676, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6296579241752625, + "max": 0.8892135620117188, + "mean": 1.1699157766997814e-05, + "std": 0.023528022691607475, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.5068321824073792, + "max": 0.4739873707294464, + "mean": -0.003016006201505661, + "std": 0.06930257380008698, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.norm_out.g": { + "min": 0.5377203226089478, + "max": 1.1807109117507935, + "mean": 0.7827430367469788, + "std": 0.09885811805725098, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.proj_out.weight": { + "min": -0.2669532299041748, + "max": 0.2126723825931549, + "mean": -0.00022305321181192994, + "std": 0.05399656668305397, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ] + }, + "transformer.proj_out.bias": { + "min": -0.23791296780109406, + "max": 0.014832733199000359, + "mean": -0.04395970329642296, + "std": 0.03433232381939888, + "sparsity": 0.0, + "shape": [ + 100 + ] + } + } +} \ No newline at end of file