diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,4683 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "recommendations": { + "focus_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ] + }, + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.43091416358947754, + "max": 0.2991102933883667, + "mean": -0.002557656727731228, + "std": 0.04255230724811554, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ] + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.06317874044179916, + "max": 0.10845368355512619, + "mean": 0.0006046494818292558, + "std": 0.0341438427567482, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.4125913977622986, + "max": 0.8363389372825623, + "mean": -0.0002094925002893433, + "std": 0.024107541888952255, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.11634448170661926, + "max": 0.32392504811286926, + "mean": -0.0009387563331983984, + "std": 0.019654380157589912, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.8076894283294678, + "max": 2.8856873512268066, + "mean": -0.0003593244473449886, + "std": 0.6153794527053833, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ] + }, + "transformer.input_embed.proj.weight": { + "min": -0.2804395258426666, + "max": 0.38235825300216675, + "mean": 0.00042111962102353573, + "std": 0.0427500456571579, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ] + }, + "transformer.input_embed.proj.bias": { + "min": -0.22397927939891815, + "max": 0.21124881505966187, + "mean": -0.004504885524511337, + "std": 0.04102449491620064, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.42797791957855225, + "max": 0.4753724932670593, + "mean": 3.1681217933510197e-06, + "std": 0.024508841335773468, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.3278864026069641, + "max": 0.15815186500549316, + "mean": -0.046754755079746246, + "std": 0.05172203853726387, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.4108750522136688, + "max": 0.3548462688922882, + "mean": -0.0001276329276151955, + "std": 0.023600950837135315, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.231490820646286, + "max": 0.26459917426109314, + "mean": -0.029202936217188835, + "std": 0.049504559487104416, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.1.g": { + "min": 0.2546604871749878, + "max": 0.8254969120025635, + "mean": 0.5257646441459656, + "std": 0.08148879557847977, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.2975306808948517, + "max": 0.26634442806243896, + "mean": -0.0004239020636305213, + "std": 0.032103944569826126, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.093165822327137, + "max": 0.12537634372711182, + "mean": 0.0006500760791823268, + "std": 0.0257789958268404, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.2912229299545288, + "max": 0.2824551463127136, + "mean": -7.682169962208718e-05, + "std": 0.03093571960926056, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.9252495765686035, + "max": 5.839654445648193, + "mean": -0.00940663367509842, + "std": 1.2986583709716797, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.4255436658859253, + "max": 0.34462970495224, + "mean": 9.765196591615677e-05, + "std": 0.02995290234684944, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.028961628675460815, + "max": 0.027653951197862625, + "mean": -0.000311878917273134, + "std": 0.012572262436151505, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.4547809660434723, + "max": 0.44922640919685364, + "mean": 2.2741787688573822e-05, + "std": 0.023854725062847137, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.08907536417245865, + "max": 0.09154797345399857, + "mean": 0.0022746319882571697, + "std": 0.019537169486284256, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.3.g": { + "min": 0.2665960192680359, + "max": 1.0631530284881592, + "mean": 0.5315366387367249, + "std": 0.10529287159442902, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5752094984054565, + "max": 0.6091693043708801, + "mean": -0.0004337065329309553, + "std": 0.038595084100961685, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.18266847729682922, + "max": 0.04574590548872948, + "mean": -0.02949558012187481, + "std": 0.042705073952674866, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.168283462524414, + "max": 1.6358791589736938, + "mean": 0.0003184601664543152, + "std": 0.027693841606378555, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.1632407307624817, + "max": 0.20662632584571838, + "mean": -0.02112644352018833, + "std": 0.027983704581856728, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.1.g": { + "min": 0.2244432270526886, + "max": 0.8492330312728882, + "mean": 0.4877929091453552, + "std": 0.07575991004705429, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.25644662976264954, + "max": 0.30648505687713623, + "mean": -9.105999197345227e-06, + "std": 0.03347046673297882, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.09590143710374832, + "max": 0.11091545224189758, + "mean": 5.9943689848296344e-05, + "std": 0.02701094001531601, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.29843541979789734, + "max": 0.29746681451797485, + "mean": 5.037898154114373e-05, + "std": 0.0325385183095932, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.186855792999268, + "max": 5.106731414794922, + "mean": -0.014725911431014538, + "std": 1.1609561443328857, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.34537965059280396, + "max": 0.3438728153705597, + "mean": 7.886411185609177e-05, + "std": 0.030058259144425392, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.036315590143203735, + "max": 0.033395010977983475, + "mean": -0.00014420351362787187, + "std": 0.013025550171732903, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.3161202371120453, + "max": 0.37616145610809326, + "mean": -2.1655154341715388e-05, + "std": 0.02405548468232155, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.10574664920568466, + "max": 0.12242550402879715, + "mean": -0.0019548372365534306, + "std": 0.028876660391688347, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.3.g": { + "min": 0.31179988384246826, + "max": 1.1284958124160767, + "mean": 0.6666731238365173, + "std": 0.09859278053045273, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.8728909492492676, + "max": 0.6278397440910339, + "mean": 0.0016749973874539137, + "std": 0.047438185662031174, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.2722160518169403, + "max": 0.0340891033411026, + "mean": -0.046644046902656555, + "std": 0.04069075360894203, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.922055184841156, + "max": 0.9654105305671692, + "mean": 0.0010205680737271905, + "std": 0.04070195555686951, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.14518415927886963, + "max": 0.07515987008810043, + "mean": -0.009094657376408577, + "std": 0.025729060173034668, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.1.g": { + "min": 0.2397412657737732, + "max": 0.7171911001205444, + "mean": 0.447447270154953, + "std": 0.05987730622291565, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.2741525173187256, + "max": 0.29877936840057373, + "mean": 8.61497210280504e-06, + "std": 0.03547372668981552, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.11957156658172607, + "max": 0.11899449676275253, + "mean": 0.0007509939605370164, + "std": 0.0276488047093153, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.2823837697505951, + "max": 0.28084659576416016, + "mean": -7.657262904103845e-05, + "std": 0.035102009773254395, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.5205748081207275, + "max": 2.532623291015625, + "mean": 0.02687813714146614, + "std": 0.5879213809967041, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.2220122367143631, + "max": 0.27260157465934753, + "mean": 2.5499884941382334e-06, + "std": 0.030731454491615295, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.03331878036260605, + "max": 0.031287048012018204, + "mean": 0.00011721440387191251, + "std": 0.01239620428532362, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.2359972894191742, + "max": 0.23261798918247223, + "mean": 5.7136268878821284e-05, + "std": 0.025697365403175354, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.13661594688892365, + "max": 0.12854568660259247, + "mean": -0.005501019302755594, + "std": 0.03999658301472664, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.3.g": { + "min": 0.3546392619609833, + "max": 1.180222511291504, + "mean": 0.7107274532318115, + "std": 0.10418680310249329, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6183957457542419, + "max": 0.5562719106674194, + "mean": 0.001160319778136909, + "std": 0.04611416533589363, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.19019058346748352, + "max": 0.024931631982326508, + "mean": -0.034878939390182495, + "std": 0.028703488409519196, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.1339737176895142, + "max": 0.9729978442192078, + "mean": 0.00035909697180613875, + "std": 0.04234269633889198, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.6004759073257446, + "max": 0.06302264332771301, + "mean": -0.004885237663984299, + "std": 0.028683220967650414, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.1.g": { + "min": 0.37538695335388184, + "max": 0.9469302892684937, + "mean": 0.5929263234138489, + "std": 0.0680219903588295, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.3926527798175812, + "max": 0.37037163972854614, + "mean": 7.004380313446745e-05, + "std": 0.03718654066324234, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11952866613864899, + "max": 0.1371433585882187, + "mean": 0.0009209888521581888, + "std": 0.029237791895866394, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.6214983463287354, + "max": 0.5109242796897888, + "mean": 1.5226184586936142e-05, + "std": 0.036439333111047745, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.222587585449219, + "max": 8.827320098876953, + "mean": -0.10952811688184738, + "std": 1.7043956518173218, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.2775035798549652, + "max": 0.24042560160160065, + "mean": 5.222904292168096e-05, + "std": 0.03261308744549751, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.05175856128334999, + "max": 0.03964223712682724, + "mean": 9.375870286021382e-05, + "std": 0.012972756288945675, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.23131398856639862, + "max": 0.2357378751039505, + "mean": -2.203516305598896e-05, + "std": 0.02938969060778618, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.2051505148410797, + "max": 0.10573741793632507, + "mean": -0.0040251207537949085, + "std": 0.032664697617292404, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.3.g": { + "min": 0.3397069573402405, + "max": 1.01918625831604, + "mean": 0.7008247375488281, + "std": 0.0969780907034874, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5670483708381653, + "max": 0.8365305662155151, + "mean": 0.00041504879482090473, + "std": 0.042294830083847046, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.2130415141582489, + "max": 0.029987983405590057, + "mean": -0.03220636397600174, + "std": 0.02657567895948887, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7582250833511353, + "max": 0.7219672799110413, + "mean": -1.576655267854221e-05, + "std": 0.03683546185493469, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.26458415389060974, + "max": 0.10674209892749786, + "mean": -0.003017352893948555, + "std": 0.02890385128557682, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.1.g": { + "min": 0.28402721881866455, + "max": 0.6998150944709778, + "mean": 0.49963071942329407, + "std": 0.04700654000043869, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.27952155470848083, + "max": 0.23467987775802612, + "mean": -0.00011085892765549943, + "std": 0.038757603615522385, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15429016947746277, + "max": 0.12700684368610382, + "mean": -0.002232399070635438, + "std": 0.033386100083589554, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.41612547636032104, + "max": 0.6611561179161072, + "mean": -1.8461763829691336e-05, + "std": 0.03909667953848839, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.2564592361450195, + "max": 4.743135929107666, + "mean": -0.020397484302520752, + "std": 1.0097577571868896, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.2459408938884735, + "max": 0.2083207219839096, + "mean": 4.4360454921843484e-05, + "std": 0.03396270051598549, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.03462521731853485, + "max": 0.045053571462631226, + "mean": -2.1719199139624834e-05, + "std": 0.012641450390219688, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.20202401280403137, + "max": 0.20743757486343384, + "mean": -2.9260227165650576e-05, + "std": 0.031020890921354294, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.20072369277477264, + "max": 0.11369979381561279, + "mean": -0.002900277031585574, + "std": 0.03456325829029083, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.3.g": { + "min": 0.3669256269931793, + "max": 1.064845323562622, + "mean": 0.6706051230430603, + "std": 0.06665434688329697, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.4000990390777588, + "max": 0.5037862062454224, + "mean": -3.870507498504594e-05, + "std": 0.04113040864467621, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.12917247414588928, + "max": 0.026963019743561745, + "mean": -0.030557911843061447, + "std": 0.021937619894742966, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.4511619806289673, + "max": 0.4353387653827667, + "mean": 7.546078268205747e-05, + "std": 0.03489077836275101, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.26869964599609375, + "max": 0.07339140772819519, + "mean": -0.0010946399997919798, + "std": 0.023160062730312347, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.1.g": { + "min": 0.2875079810619354, + "max": 0.6899884343147278, + "mean": 0.5247476696968079, + "std": 0.04796215519309044, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.22366264462471008, + "max": 0.2245350182056427, + "mean": 1.589955536474008e-05, + "std": 0.038949232548475266, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.13696447014808655, + "max": 0.10982562601566315, + "mean": 0.0002473338390700519, + "std": 0.029272515326738358, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.37620943784713745, + "max": 0.4390593469142914, + "mean": -9.372964996146038e-06, + "std": 0.039287250488996506, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.8626632690429688, + "max": 5.021180629730225, + "mean": 0.009756950661540031, + "std": 0.8471038937568665, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.2235114425420761, + "max": 0.2212144434452057, + "mean": -3.48434696206823e-07, + "std": 0.03441031649708748, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.04396739602088928, + "max": 0.03608814626932144, + "mean": -0.00025925497175194323, + "std": 0.012080671265721321, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.2138509899377823, + "max": 0.18955761194229126, + "mean": -1.6947185940807685e-05, + "std": 0.03153672814369202, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.18172238767147064, + "max": 0.12127514183521271, + "mean": -0.0023971181362867355, + "std": 0.04130159318447113, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.3.g": { + "min": 0.42289772629737854, + "max": 0.9483197927474976, + "mean": 0.6628358364105225, + "std": 0.05716627463698387, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.37180185317993164, + "max": 0.47763875126838684, + "mean": -8.19972192402929e-05, + "std": 0.040889617055654526, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.209408238530159, + "max": 0.027359697967767715, + "mean": -0.0302574522793293, + "std": 0.021417709067463875, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.3422113060951233, + "max": 0.7372819185256958, + "mean": 8.242652984336019e-05, + "std": 0.034766409546136856, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.2412174493074417, + "max": 0.05068235844373703, + "mean": -0.0011914315400645137, + "std": 0.020485328510403633, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.1.g": { + "min": 0.30587607622146606, + "max": 0.6579968333244324, + "mean": 0.5253006219863892, + "std": 0.0464390330016613, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.30547264218330383, + "max": 0.21810249984264374, + "mean": 6.997188756940886e-05, + "std": 0.039497073739767075, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.14979463815689087, + "max": 0.13157697021961212, + "mean": 0.00032728962833061814, + "std": 0.030529892072081566, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.25832319259643555, + "max": 0.20298458635807037, + "mean": 3.122862472082488e-05, + "std": 0.039488088339567184, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.3464906215667725, + "max": 2.3862874507904053, + "mean": -0.0262940414249897, + "std": 0.45072564482688904, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.18955294787883759, + "max": 0.211393803358078, + "mean": 3.7051289837108925e-05, + "std": 0.03479388728737831, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.03182046860456467, + "max": 0.03580700233578682, + "mean": -0.0001974685292225331, + "std": 0.012292041443288326, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.18930117785930634, + "max": 0.17112135887145996, + "mean": -6.836307875346392e-05, + "std": 0.03217054903507233, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.14002393186092377, + "max": 0.1378386914730072, + "mean": -0.0025169737637043, + "std": 0.05131695047020912, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.3.g": { + "min": 0.4669981598854065, + "max": 0.9623145461082458, + "mean": 0.669116199016571, + "std": 0.053326528519392014, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.324962854385376, + "max": 0.3098026514053345, + "mean": -9.876448530121706e-07, + "std": 0.0409456230700016, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.12541106343269348, + "max": 0.025640888139605522, + "mean": -0.030711790546774864, + "std": 0.019869431853294373, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.44164079427719116, + "max": 0.4474758803844452, + "mean": 9.588097600499168e-05, + "std": 0.03511932119727135, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.2256106585264206, + "max": 0.052044421434402466, + "mean": -0.0011865352280437946, + "std": 0.018494844436645508, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.1.g": { + "min": 0.33912554383277893, + "max": 0.7450283169746399, + "mean": 0.558834433555603, + "std": 0.041677191853523254, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.27382639050483704, + "max": 0.27962929010391235, + "mean": 2.034128556260839e-05, + "std": 0.0410577729344368, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.13741885125637054, + "max": 0.14038565754890442, + "mean": 0.0004929338465444744, + "std": 0.02668425627052784, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.49240002036094666, + "max": 0.35733160376548767, + "mean": 8.901266846805811e-05, + "std": 0.04069547727704048, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.3072962760925293, + "max": 1.7529240846633911, + "mean": -0.021147169172763824, + "std": 0.5008938312530518, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.21894769370555878, + "max": 0.19816064834594727, + "mean": -4.0161168726626784e-05, + "std": 0.03423343971371651, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.04133184999227524, + "max": 0.03901350870728493, + "mean": -0.00013613827468361706, + "std": 0.012887353077530861, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17847225069999695, + "max": 0.1837986409664154, + "mean": 4.7998124500736594e-05, + "std": 0.031556759029626846, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.180707648396492, + "max": 0.18469232320785522, + "mean": -0.0022159582003951073, + "std": 0.05485893413424492, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.3.g": { + "min": 0.4741988480091095, + "max": 1.0330065488815308, + "mean": 0.6454803347587585, + "std": 0.05105094239115715, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.2723560929298401, + "max": 0.3096334636211395, + "mean": 0.00011242127220612019, + "std": 0.040681805461645126, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.10577475279569626, + "max": 0.026752889156341553, + "mean": -0.029537281021475792, + "std": 0.01797310821712017, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.3403210937976837, + "max": 0.33086487650871277, + "mean": 5.282106576487422e-05, + "std": 0.034412968903779984, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.18259213864803314, + "max": 0.04268056899309158, + "mean": -0.0010635886574164033, + "std": 0.017230909317731857, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.1.g": { + "min": 0.32514795660972595, + "max": 0.6914159655570984, + "mean": 0.5113943219184875, + "std": 0.03739636018872261, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.2348308116197586, + "max": 0.22631730139255524, + "mean": -3.621048017521389e-05, + "std": 0.0391756109893322, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.11563856154680252, + "max": 0.13239268958568573, + "mean": 0.00015192970749922097, + "std": 0.029222996905446053, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.35409149527549744, + "max": 0.2863385081291199, + "mean": 6.707018656015862e-06, + "std": 0.03924466669559479, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.1504130363464355, + "max": 3.5592541694641113, + "mean": -0.011647488921880722, + "std": 0.6845048069953918, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.21134592592716217, + "max": 0.21000461280345917, + "mean": 3.47579552908428e-05, + "std": 0.03448459133505821, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.036000702530145645, + "max": 0.04817511513829231, + "mean": 0.0007898924523033202, + "std": 0.012873834930360317, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.2113579511642456, + "max": 0.19389942288398743, + "mean": -1.0706971806939691e-06, + "std": 0.0316954106092453, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.1872350424528122, + "max": 0.1779821664094925, + "mean": -0.002844380447641015, + "std": 0.058656178414821625, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.3.g": { + "min": 0.4746103286743164, + "max": 1.0489076375961304, + "mean": 0.6516687870025635, + "std": 0.05057830363512039, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.24878337979316711, + "max": 0.3296516239643097, + "mean": 0.00018073963292408735, + "std": 0.04057016968727112, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.12595486640930176, + "max": 0.02493392489850521, + "mean": -0.030515050515532494, + "std": 0.01764742285013199, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.4225960969924927, + "max": 0.4839133322238922, + "mean": 1.030291969073005e-06, + "std": 0.035397231578826904, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.1520412415266037, + "max": 0.043631311506032944, + "mean": 4.209935286780819e-05, + "std": 0.014901721850037575, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.1.g": { + "min": 0.31559863686561584, + "max": 0.686523973941803, + "mean": 0.553006649017334, + "std": 0.040904585272073746, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.20726847648620605, + "max": 0.22089692950248718, + "mean": 3.191033465554938e-05, + "std": 0.03829946741461754, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.13833385705947876, + "max": 0.11308565735816956, + "mean": 2.6655456167645752e-05, + "std": 0.025857754051685333, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.4046614170074463, + "max": 0.37271684408187866, + "mean": 2.56894181802636e-05, + "std": 0.0381796769797802, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.7873597145080566, + "max": 2.881237506866455, + "mean": 0.0011979229748249054, + "std": 0.5181517601013184, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.20434829592704773, + "max": 0.19823738932609558, + "mean": 2.9684193577850237e-05, + "std": 0.03429735451936722, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.050780050456523895, + "max": 0.040064383298158646, + "mean": -0.00042128204950131476, + "std": 0.01341989729553461, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.1970871537923813, + "max": 0.20266157388687134, + "mean": -1.2426969988155179e-05, + "std": 0.031805407255887985, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.1938190907239914, + "max": 0.19595396518707275, + "mean": -0.0029727788642048836, + "std": 0.06256895512342453, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.3.g": { + "min": 0.34895268082618713, + "max": 1.0913121700286865, + "mean": 0.6674203276634216, + "std": 0.056132975965738297, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22646191716194153, + "max": 0.25265538692474365, + "mean": 0.0003584488877095282, + "std": 0.040759678930044174, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.09146817028522491, + "max": 0.04364684969186783, + "mean": -0.030097611248493195, + "std": 0.017646051943302155, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.35469669103622437, + "max": 0.30548718571662903, + "mean": -4.469315172173083e-05, + "std": 0.03712276369333267, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.1623995155096054, + "max": 0.06374479830265045, + "mean": -8.042766421567649e-05, + "std": 0.01944616436958313, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.1.g": { + "min": 0.34871119260787964, + "max": 0.7271286249160767, + "mean": 0.5425379872322083, + "std": 0.03944627195596695, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.2201070785522461, + "max": 0.2242431491613388, + "mean": -1.1387233826098964e-05, + "std": 0.03923100233078003, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.11890711635351181, + "max": 0.1713198721408844, + "mean": 0.0002833662729244679, + "std": 0.025163158774375916, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.24783332645893097, + "max": 0.30217495560646057, + "mean": -3.6862991692032665e-05, + "std": 0.038930460810661316, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.520315170288086, + "max": 3.7306737899780273, + "mean": 0.015852145850658417, + "std": 0.7850235104560852, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.21981129050254822, + "max": 0.23816066980361938, + "mean": -1.3107633094477933e-05, + "std": 0.036303482949733734, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04740596562623978, + "max": 0.05159047618508339, + "mean": 0.000481397844851017, + "std": 0.013528619892895222, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.2151964157819748, + "max": 0.21832282841205597, + "mean": 5.642603355227038e-05, + "std": 0.03361587971448898, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.2122570425271988, + "max": 0.23222938179969788, + "mean": -0.005098365712910891, + "std": 0.06190234050154686, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.3.g": { + "min": 0.36193394660949707, + "max": 1.1087924242019653, + "mean": 0.6995820999145508, + "std": 0.05450976639986038, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.23606520891189575, + "max": 0.24584993720054626, + "mean": 0.00046336432569660246, + "std": 0.041269052773714066, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.09852692484855652, + "max": 0.06841564178466797, + "mean": -0.0314490832388401, + "std": 0.01816665753722191, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.30322569608688354, + "max": 0.3532632291316986, + "mean": -8.268543751910329e-05, + "std": 0.04027474299073219, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.15293245017528534, + "max": 0.1503082662820816, + "mean": 0.0002610071678645909, + "std": 0.023066464811563492, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.1.g": { + "min": 0.9987825155258179, + "max": 1.011022686958313, + "mean": 1.0016167163848877, + "std": 0.004121079575270414, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.03126484900712967, + "max": 0.03125990182161331, + "mean": -1.9292880097054876e-05, + "std": 0.0180410947650671, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.031222796067595482, + "max": 0.030990226194262505, + "mean": -0.001084181945770979, + "std": 0.017950553447008133, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.03126567602157593, + "max": 0.031269483268260956, + "mean": 3.546300376910949e-06, + "std": 0.018041500821709633, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.03114791214466095, + "max": 0.03117155283689499, + "mean": 0.0003340535331517458, + "std": 0.018062960356473923, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": -0.0005971609498374164, + "max": 0.0006745979771949351, + "mean": 4.374485797598027e-06, + "std": 0.0001794710842659697, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.3.g": { + "min": 0.9978547096252441, + "max": 1.0122681856155396, + "mean": 1.0009429454803467, + "std": 0.0034361695870757103, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.03340178728103638, + "max": 0.033508703112602234, + "mean": -6.2318931668414734e-06, + "std": 0.01804722100496292, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.03293577954173088, + "max": 0.03327555954456329, + "mean": -0.00015042479208204895, + "std": 0.017954858019948006, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.11.4.ff.2.weight": { + "min": -0.00139134272467345, + "max": 0.0014818700728937984, + "mean": 1.7994759673456429e-06, + "std": 0.0002722168283071369, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.11.4.ff.2.bias": { + "min": -0.0005520335980691016, + "max": 0.0007331477245315909, + "mean": 7.149023986130487e-06, + "std": 0.0001629332109587267, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.1.g": { + "min": 0.3833008110523224, + "max": 0.7242851853370667, + "mean": 0.5809347033500671, + "std": 0.039344511926174164, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.2398604303598404, + "max": 0.19741135835647583, + "mean": 2.61208933807211e-05, + "std": 0.037466324865818024, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.1193285658955574, + "max": 0.16746975481510162, + "mean": 0.0009843853767961264, + "std": 0.027611562982201576, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.24755319952964783, + "max": 0.5020493268966675, + "mean": -5.023340054322034e-05, + "std": 0.037623729556798935, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.959080934524536, + "max": 3.785468339920044, + "mean": -0.003608043771237135, + "std": 0.6828969120979309, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.2280745655298233, + "max": 0.25265711545944214, + "mean": -1.1726486263796687e-05, + "std": 0.037434931844472885, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07189386337995529, + "max": 0.08095899969339371, + "mean": -0.0005116118700243533, + "std": 0.015669817104935646, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.22852574288845062, + "max": 0.2589001953601837, + "mean": -2.8789245334337465e-05, + "std": 0.035421740263700485, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.20139215886592865, + "max": 0.21579185128211975, + "mean": -0.005532890558242798, + "std": 0.06838470697402954, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.3.g": { + "min": 0.40495166182518005, + "max": 1.1977423429489136, + "mean": 0.7382426857948303, + "std": 0.05618907883763313, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.22189897298812866, + "max": 0.24627524614334106, + "mean": 0.0005210949457250535, + "std": 0.0413360670208931, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.10370241105556488, + "max": 0.024191563948988914, + "mean": -0.03269057348370552, + "std": 0.018939778208732605, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.45156151056289673, + "max": 0.42444875836372375, + "mean": -0.00043494877172634006, + "std": 0.046896398067474365, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.25261297821998596, + "max": 0.47218039631843567, + "mean": 0.0032064011320471764, + "std": 0.0446014478802681, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.0.weight": { + "min": -0.3172667622566223, + "max": 0.33354270458221436, + "mean": -2.519888585084118e-05, + "std": 0.021287826821208, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.13.1.g": { + "min": 0.3245178461074829, + "max": 0.6904165148735046, + "mean": 0.5711733102798462, + "std": 0.04502657428383827, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.16521431505680084, + "max": 0.1752052754163742, + "mean": -4.8754882300272584e-05, + "std": 0.033182479441165924, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18773159384727478, + "max": 0.14384877681732178, + "mean": 3.672283492051065e-05, + "std": 0.02975340373814106, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.38243839144706726, + "max": 0.24725475907325745, + "mean": -9.841056453296915e-06, + "std": 0.03276367485523224, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.6714818477630615, + "max": 3.3041720390319824, + "mean": -0.014343326911330223, + "std": 0.9862688779830933, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.23551921546459198, + "max": 0.24833251535892487, + "mean": -1.8171514966525137e-05, + "std": 0.041698355227708817, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.07285058498382568, + "max": 0.1551419198513031, + "mean": 0.0006671739974990487, + "std": 0.02518472634255886, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.26684004068374634, + "max": 0.2486322820186615, + "mean": -1.5217347026919015e-05, + "std": 0.040139369666576385, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.19041800498962402, + "max": 0.19548022747039795, + "mean": -0.001239710720255971, + "std": 0.06670945882797241, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.3.g": { + "min": 0.3291718661785126, + "max": 1.0067707300186157, + "mean": 0.7195272445678711, + "std": 0.053192976862192154, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.23261909186840057, + "max": 0.24629585444927216, + "mean": 0.0001829106913646683, + "std": 0.04090041667222977, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11500220745801926, + "max": 0.01902289316058159, + "mean": -0.042502518743276596, + "std": 0.01891784742474556, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.3915143311023712, + "max": 0.4093465507030487, + "mean": -2.1941355953458697e-05, + "std": 0.04853365942835808, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.6959867477416992, + "max": 0.41447487473487854, + "mean": 0.0008487096056342125, + "std": 0.06040440872311592, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.0.weight": { + "min": -0.0013131406158208847, + "max": 1.000697135925293, + "mean": 0.00048820037045516074, + "std": 0.022089475765824318, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.14.1.g": { + "min": 0.9987786412239075, + "max": 1.0108789205551147, + "mean": 1.0015242099761963, + "std": 0.003978394437581301, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.03125932812690735, + "max": 0.031260255724191666, + "mean": -2.101710924762301e-05, + "std": 0.018032435327768326, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.031216789036989212, + "max": 0.0312344953417778, + "mean": -0.0006770212785340846, + "std": 0.017827019095420837, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.03126361221075058, + "max": 0.03126442804932594, + "mean": -8.826009434415027e-06, + "std": 0.018031461164355278, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.031229715794324875, + "max": 0.031247057020664215, + "mean": -0.0007297845440916717, + "std": 0.01794196106493473, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": -0.0004946183180436492, + "max": 0.00040109679684974253, + "mean": -3.799516889557708e-06, + "std": 0.00014799994823988527, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.3.g": { + "min": 0.9972319006919861, + "max": 1.0116411447525024, + "mean": 1.0005743503570557, + "std": 0.0034592244774103165, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.03315997123718262, + "max": 0.032729245722293854, + "mean": -2.570214064689935e-06, + "std": 0.018028665333986282, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.03235220909118652, + "max": 0.03128715977072716, + "mean": -0.00045961630530655384, + "std": 0.018038177862763405, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.14.4.ff.2.weight": { + "min": -0.0016143623506650329, + "max": 0.001427292707376182, + "mean": -1.0927603852906032e-06, + "std": 0.00026996160158887506, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.14.4.ff.2.bias": { + "min": -0.00045358933857642114, + "max": 0.00036658692988567054, + "mean": -3.5024249882553704e-06, + "std": 0.0001358992449240759, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.0.weight": { + "min": -0.23466038703918457, + "max": 0.2728899419307709, + "mean": 6.680695605609799e-06, + "std": 0.018810251727700233, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.15.1.g": { + "min": 0.3215275704860687, + "max": 0.6988651752471924, + "mean": 0.5818086862564087, + "std": 0.04628920555114746, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.18249788880348206, + "max": 0.1985490918159485, + "mean": -1.1619875294854864e-05, + "std": 0.0331842340528965, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.16120854020118713, + "max": 0.12988702952861786, + "mean": -0.0010746754705905914, + "std": 0.034188635647296906, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.3333602249622345, + "max": 0.31210559606552124, + "mean": -1.0246277270198334e-05, + "std": 0.03223477676510811, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.836638927459717, + "max": 8.800041198730469, + "mean": 0.09370891749858856, + "std": 1.6243042945861816, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23471659421920776, + "max": 0.24255934357643127, + "mean": 4.1660623537609354e-05, + "std": 0.04085636883974075, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07628928869962692, + "max": 0.06604960560798645, + "mean": 0.0004821753827854991, + "std": 0.01943657174706459, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.24707570672035217, + "max": 0.2350512593984604, + "mean": -3.330966137582436e-06, + "std": 0.03943110629916191, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.16370660066604614, + "max": 0.16159522533416748, + "mean": 0.0016214787028729916, + "std": 0.06530040502548218, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.3.g": { + "min": 0.556998610496521, + "max": 0.9505069851875305, + "mean": 0.7131754159927368, + "std": 0.04095931351184845, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.22923102974891663, + "max": 0.25587573647499084, + "mean": -4.568279109662399e-05, + "std": 0.040574610233306885, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.13533200323581696, + "max": 0.022116411477327347, + "mean": -0.041375163942575455, + "std": 0.018435189500451088, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.42361417412757874, + "max": 0.39315521717071533, + "mean": -4.420744517119601e-06, + "std": 0.047783900052309036, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.6098850965499878, + "max": 0.6541793942451477, + "mean": 0.001589474268257618, + "std": 0.056938592344522476, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.0.weight": { + "min": -0.2520405650138855, + "max": 0.3211195170879364, + "mean": -6.1747768995701335e-06, + "std": 0.019613485783338547, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.16.1.g": { + "min": 0.35947033762931824, + "max": 0.6870434284210205, + "mean": 0.5708057880401611, + "std": 0.04320356622338295, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.22096499800682068, + "max": 0.1776382476091385, + "mean": -3.44411309924908e-05, + "std": 0.034298770129680634, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.16386361420154572, + "max": 0.23379802703857422, + "mean": 0.0003647217818070203, + "std": 0.032876912504434586, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.2648993730545044, + "max": 0.2407570779323578, + "mean": -5.283746577333659e-05, + "std": 0.03389748930931091, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.875531196594238, + "max": 5.112789630889893, + "mean": 0.04403312876820564, + "std": 1.231998324394226, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.24717208743095398, + "max": 0.2512055039405823, + "mean": 7.22141849109903e-05, + "std": 0.043986547738313675, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.06276638805866241, + "max": 0.054656121879816055, + "mean": 0.0006459522992372513, + "std": 0.017198164016008377, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.2877632677555084, + "max": 0.2726806104183197, + "mean": -5.0024795200442895e-05, + "std": 0.042984671890735626, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.16170376539230347, + "max": 0.1710934340953827, + "mean": -0.0028864555060863495, + "std": 0.05931045860052109, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.3.g": { + "min": 0.51991868019104, + "max": 0.9398472905158997, + "mean": 0.7137647867202759, + "std": 0.03922666609287262, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23831400275230408, + "max": 0.2492961287498474, + "mean": 0.00046471404493786395, + "std": 0.040453460067510605, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.14562036097049713, + "max": 0.04111756384372711, + "mean": -0.039718322455883026, + "std": 0.02059181034564972, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.5351076126098633, + "max": 0.5854408740997314, + "mean": 5.962188879493624e-06, + "std": 0.0488593615591526, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5212635397911072, + "max": 0.4954894483089447, + "mean": 0.0023677186109125614, + "std": 0.05354826897382736, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.0.weight": { + "min": -0.27395325899124146, + "max": 0.31585943698883057, + "mean": 1.8985367660206975e-06, + "std": 0.020050065591931343, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.17.1.g": { + "min": 0.3660656809806824, + "max": 0.7167491316795349, + "mean": 0.593307375907898, + "std": 0.04627520218491554, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.21157211065292358, + "max": 0.19981449842453003, + "mean": 3.063139592995867e-05, + "std": 0.03486718237400055, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.1879485547542572, + "max": 0.2043510377407074, + "mean": 0.0009530138340778649, + "std": 0.031568389385938644, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.29089149832725525, + "max": 0.341105580329895, + "mean": -4.692538641393185e-05, + "std": 0.03458765521645546, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.893813371658325, + "max": 3.4017703533172607, + "mean": 0.014513500966131687, + "std": 0.8598799705505371, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.22526344656944275, + "max": 0.250789076089859, + "mean": -3.7296154005161952e-06, + "std": 0.042229536920785904, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.05549817904829979, + "max": 0.046731892973184586, + "mean": -2.1666113752871752e-05, + "std": 0.0158494021743536, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.29372450709342957, + "max": 0.2908160388469696, + "mean": -7.59748127165949e-06, + "std": 0.041944604367017746, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.12536406517028809, + "max": 0.2601471245288849, + "mean": -0.0032426435500383377, + "std": 0.05318090319633484, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.3.g": { + "min": 0.45628464221954346, + "max": 0.8507043719291687, + "mean": 0.7057910561561584, + "std": 0.03590774908661842, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.5123029351234436, + "max": 0.34838762879371643, + "mean": 0.0003429077914915979, + "std": 0.04019884020090103, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.1866319328546524, + "max": 0.039536003023386, + "mean": -0.03940858319401741, + "std": 0.021406862884759903, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.5465707778930664, + "max": 0.5584931969642639, + "mean": -7.126475975383073e-05, + "std": 0.050734348595142365, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5138925909996033, + "max": 0.6670938730239868, + "mean": 0.0024418262764811516, + "std": 0.04960782080888748, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.0.weight": { + "min": -0.33276569843292236, + "max": 0.26628994941711426, + "mean": 3.292404471721966e-06, + "std": 0.01938711293041706, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.18.1.g": { + "min": 0.3219706416130066, + "max": 0.7718862295150757, + "mean": 0.651161789894104, + "std": 0.04554183781147003, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.2507328987121582, + "max": 0.22062398493289948, + "mean": -2.0154016056039836e-06, + "std": 0.03650148585438728, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.3283964991569519, + "max": 0.2880261540412903, + "mean": -0.0006875221151858568, + "std": 0.038663797080516815, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.3113596737384796, + "max": 0.37169572710990906, + "mean": 6.504646444227546e-05, + "std": 0.03624209389090538, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.737742900848389, + "max": 5.83281946182251, + "mean": 0.03801126033067703, + "std": 1.4163931608200073, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.2227693796157837, + "max": 0.2069622278213501, + "mean": -7.526973786298186e-05, + "std": 0.042485106736421585, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.07798711210489273, + "max": 0.05173616483807564, + "mean": -0.0009264935506507754, + "std": 0.016420088708400726, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.3309612274169922, + "max": 0.3296358287334442, + "mean": -4.774779426952591e-06, + "std": 0.04279141128063202, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.28600984811782837, + "max": 0.11250722408294678, + "mean": -0.0012054404942318797, + "std": 0.04702861234545708, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.3.g": { + "min": 0.4860813617706299, + "max": 0.8933811783790588, + "mean": 0.7376744747161865, + "std": 0.038892824202775955, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.36275342106819153, + "max": 0.2756327986717224, + "mean": 5.113358929520473e-05, + "std": 0.04064434394240379, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.2486657202243805, + "max": 0.046376701444387436, + "mean": -0.03928756341338158, + "std": 0.023350302129983902, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.6290910840034485, + "max": 0.5994174480438232, + "mean": -6.010006836731918e-05, + "std": 0.0531165786087513, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.712557315826416, + "max": 0.26695698499679565, + "mean": 0.000916715245693922, + "std": 0.051312319934368134, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.0.weight": { + "min": -0.3435560464859009, + "max": 0.3038403391838074, + "mean": 2.054806600426673e-07, + "std": 0.01913570426404476, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.19.1.g": { + "min": 0.34980928897857666, + "max": 0.7884078621864319, + "mean": 0.6389412879943848, + "std": 0.04949204996228218, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.2064303159713745, + "max": 0.2077268660068512, + "mean": -5.987969052512199e-05, + "std": 0.03769605979323387, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.25974684953689575, + "max": 0.26921483874320984, + "mean": -0.000399288343032822, + "std": 0.04469470679759979, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.35545018315315247, + "max": 0.32378923892974854, + "mean": -6.928052243893035e-06, + "std": 0.03720466047525406, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.283975601196289, + "max": 4.222393035888672, + "mean": -0.0264443326741457, + "std": 1.0090056657791138, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.23976586759090424, + "max": 0.24442994594573975, + "mean": -2.508235047571361e-05, + "std": 0.04320976510643959, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.06259545683860779, + "max": 0.0569254532456398, + "mean": 0.00034189436701126397, + "std": 0.014161717146635056, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.4372391402721405, + "max": 0.37368500232696533, + "mean": 1.4562616343027912e-05, + "std": 0.044121067970991135, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.09685619175434113, + "max": 0.17668433487415314, + "mean": -0.0006592039717361331, + "std": 0.035167545080184937, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.3.g": { + "min": 0.42172640562057495, + "max": 1.0772342681884766, + "mean": 0.7485133409500122, + "std": 0.04247161000967026, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.26711001992225647, + "max": 0.2980104982852936, + "mean": -7.953734166221693e-05, + "std": 0.04080444946885109, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.18652470409870148, + "max": 0.04387153312563896, + "mean": -0.03684595599770546, + "std": 0.025674043223261833, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.4576263427734375, + "max": 0.488967627286911, + "mean": 4.3991476559313014e-05, + "std": 0.05420954152941704, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.287752240896225, + "max": 0.5537111759185791, + "mean": -0.0008832515450194478, + "std": 0.0479048416018486, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.0.weight": { + "min": -0.29307857155799866, + "max": 0.32305020093917847, + "mean": 6.496340574813075e-06, + "std": 0.01996980607509613, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.20.1.g": { + "min": 0.29093778133392334, + "max": 0.7654404640197754, + "mean": 0.6508903503417969, + "std": 0.05225415527820587, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.2440621256828308, + "max": 0.26225581765174866, + "mean": -5.966384833300253e-06, + "std": 0.03961286321282387, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.268706738948822, + "max": 0.20074717700481415, + "mean": -0.0008819116046652198, + "std": 0.05185216665267944, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.2733410894870758, + "max": 0.2549380958080292, + "mean": 4.216280103719328e-06, + "std": 0.03870992735028267, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_k.bias": { + "min": -13.020317077636719, + "max": 16.015220642089844, + "mean": 0.033375781029462814, + "std": 1.9953062534332275, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.2079249769449234, + "max": 0.22674520313739777, + "mean": -7.217413804028183e-05, + "std": 0.04055381566286087, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06965012848377228, + "max": 0.06350152939558029, + "mean": 0.00015418700058944523, + "std": 0.014755439944565296, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.4655463695526123, + "max": 0.3209993243217468, + "mean": 1.953401260834653e-05, + "std": 0.04058877378702164, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06434516608715057, + "max": 0.1157260537147522, + "mean": 0.001194344600662589, + "std": 0.02471684291958809, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.3.g": { + "min": 0.37466296553611755, + "max": 0.9391067624092102, + "mean": 0.7509991526603699, + "std": 0.04050418362021446, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.28077101707458496, + "max": 0.274548202753067, + "mean": -0.00016862244228832424, + "std": 0.04099500924348831, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.19967925548553467, + "max": 0.0508696548640728, + "mean": -0.03204797953367233, + "std": 0.025167953222990036, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.6607509851455688, + "max": 0.5379750728607178, + "mean": -4.8667719966033474e-05, + "std": 0.052846137434244156, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.1939390003681183, + "max": 0.584657609462738, + "mean": -0.0005122774746268988, + "std": 0.041145551949739456, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.0.weight": { + "min": -0.41793951392173767, + "max": 0.37214717268943787, + "mean": 6.048314844520064e-06, + "std": 0.02162175066769123, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.21.1.g": { + "min": 0.21421198546886444, + "max": 0.7522769570350647, + "mean": 0.6496115922927856, + "std": 0.054447393864393234, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.21056805551052094, + "max": 0.1966959536075592, + "mean": 4.008851828984916e-05, + "std": 0.039464544504880905, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.33072784543037415, + "max": 0.26050281524658203, + "mean": -0.003235320094972849, + "std": 0.056362900882959366, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.20648598670959473, + "max": 0.2557448148727417, + "mean": 5.435877392301336e-05, + "std": 0.038566704839468, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.270581245422363, + "max": 6.962486743927002, + "mean": 0.048468317836523056, + "std": 1.3885526657104492, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.21042834222316742, + "max": 0.23116129636764526, + "mean": -5.202562988415593e-06, + "std": 0.04131306707859039, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.044061992317438126, + "max": 0.03610403463244438, + "mean": 4.031957359984517e-06, + "std": 0.012803297489881516, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.39820992946624756, + "max": 0.3451625406742096, + "mean": -5.5655600590398535e-05, + "std": 0.04238949343562126, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.05527956411242485, + "max": 0.06314276903867722, + "mean": 0.00036968549829907715, + "std": 0.01868215762078762, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.3.g": { + "min": 0.3502121865749359, + "max": 1.0526388883590698, + "mean": 0.789475679397583, + "std": 0.049056656658649445, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.333749383687973, + "max": 0.386434406042099, + "mean": -0.00016950398276094347, + "std": 0.04148067533969879, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.15795546770095825, + "max": 0.05914008617401123, + "mean": -0.031855080276727676, + "std": 0.025188777595758438, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6976608633995056, + "max": 0.4709860682487488, + "mean": -9.084228804567829e-05, + "std": 0.051792342215776443, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.24932992458343506, + "max": 0.3299875855445862, + "mean": -0.00024624879006296396, + "std": 0.04149326682090759, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.0.weight": { + "min": -0.2875395119190216, + "max": 0.3506205677986145, + "mean": -2.1794317035528366e-06, + "std": 0.02423883229494095, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.22.1.g": { + "min": 0.19665004312992096, + "max": 0.7845895886421204, + "mean": 0.6703099608421326, + "std": 0.05872485041618347, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.22986678779125214, + "max": 0.23209868371486664, + "mean": -1.9775907276198268e-05, + "std": 0.040440451353788376, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.22065043449401855, + "max": 0.2417624443769455, + "mean": 0.0007816089782863855, + "std": 0.05589631199836731, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.21658743917942047, + "max": 0.22758929431438446, + "mean": -7.156423816923052e-05, + "std": 0.03937661275267601, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.943953514099121, + "max": 9.107547760009766, + "mean": -0.0012157298624515533, + "std": 1.8536982536315918, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.2707418203353882, + "max": 0.2602587938308716, + "mean": 4.357028228696436e-05, + "std": 0.03840764984488487, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.05789529159665108, + "max": 0.05795900523662567, + "mean": 0.0003505878266878426, + "std": 0.014736429788172245, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.2662392258644104, + "max": 0.2892150580883026, + "mean": -6.152272544568405e-05, + "std": 0.03907401114702225, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.04396943002939224, + "max": 0.037484679371118546, + "mean": -8.678687299834564e-05, + "std": 0.013375459238886833, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.3.g": { + "min": 0.3395363390445709, + "max": 1.100338101387024, + "mean": 0.863823413848877, + "std": 0.06409083306789398, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.423621267080307, + "max": 0.4195392429828644, + "mean": 0.0003127713571302593, + "std": 0.04350290074944496, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.21570223569869995, + "max": 0.17136934399604797, + "mean": -0.029504353180527687, + "std": 0.032010503113269806, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.602144181728363, + "max": 0.5620326995849609, + "mean": -0.00015219957276713103, + "std": 0.05344673991203308, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17926719784736633, + "max": 0.37834614515304565, + "mean": 0.0013675567461177707, + "std": 0.037359848618507385, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.0.weight": { + "min": -0.39466091990470886, + "max": 0.36930760741233826, + "mean": 3.647102857939899e-05, + "std": 0.028620684519410133, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.23.1.g": { + "min": 0.2902662754058838, + "max": 0.832281231880188, + "mean": 0.7056034207344055, + "std": 0.06793806701898575, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.9263004064559937, + "max": 1.0266234874725342, + "mean": -2.5708328394102864e-05, + "std": 0.04762601479887962, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.8822629451751709, + "max": 0.8186339139938354, + "mean": -0.00031781112193129957, + "std": 0.09582255780696869, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.27002349495887756, + "max": 0.24192620813846588, + "mean": -2.2872980480315164e-05, + "std": 0.03895563259720802, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.84510040283203, + "max": 22.94961166381836, + "mean": -0.09204111993312836, + "std": 4.085866928100586, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.22870811820030212, + "max": 0.24587669968605042, + "mean": -2.573069286881946e-05, + "std": 0.03863922879099846, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.06067140772938728, + "max": 0.046225275844335556, + "mean": -0.0001460441417293623, + "std": 0.014704843983054161, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.3391576111316681, + "max": 0.3760104775428772, + "mean": 7.383272532024421e-06, + "std": 0.040815357118844986, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.04665788635611534, + "max": 0.19654953479766846, + "mean": 0.0002728282706812024, + "std": 0.013587887398898602, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.3.g": { + "min": 0.37436628341674805, + "max": 1.138013482093811, + "mean": 0.8901113271713257, + "std": 0.06415355205535889, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.44819676876068115, + "max": 0.5436740517616272, + "mean": 2.450778629281558e-05, + "std": 0.04556773602962494, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.2250596135854721, + "max": 0.08822774887084961, + "mean": -0.03204711154103279, + "std": 0.0378473699092865, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7285163402557373, + "max": 0.6922004222869873, + "mean": 3.462535823928192e-05, + "std": 0.051778655499219894, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.1753203570842743, + "max": 0.21950407326221466, + "mean": 4.071232979185879e-05, + "std": 0.0318208709359169, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.0.weight": { + "min": -0.34123340249061584, + "max": 0.37526530027389526, + "mean": 4.290333163226023e-05, + "std": 0.0341440849006176, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.24.1.g": { + "min": 0.31759148836135864, + "max": 1.2954586744308472, + "mean": 0.6016563177108765, + "std": 0.08407581597566605, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.2837989628314972, + "max": 0.2609255015850067, + "mean": -3.0735166092199506e-06, + "std": 0.035984087735414505, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23655052483081818, + "max": 0.2062867432832718, + "mean": 0.0002321804640814662, + "std": 0.05606939643621445, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.436277836561203, + "max": 0.3261794447898865, + "mean": 2.4473378289258108e-05, + "std": 0.03413478285074234, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.569121360778809, + "max": 7.344529628753662, + "mean": -0.007453735917806625, + "std": 0.7020133137702942, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.3451450765132904, + "max": 0.36535224318504333, + "mean": 0.0001032469590427354, + "std": 0.047828368842601776, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07407404482364655, + "max": 0.06063373386859894, + "mean": 0.0009325749706476927, + "std": 0.014960682019591331, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.25645625591278076, + "max": 0.28786128759384155, + "mean": 4.184576027910225e-06, + "std": 0.041555535048246384, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.05557131767272949, + "max": 0.06310223042964935, + "mean": 0.00014075382205192, + "std": 0.0071859210729599, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.3.g": { + "min": 0.4938402473926544, + "max": 1.2290534973144531, + "mean": 1.0134642124176025, + "std": 0.1175011619925499, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.0939291715621948, + "max": 1.0472568273544312, + "mean": -4.937269113725051e-05, + "std": 0.052410781383514404, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.22465373575687408, + "max": 0.17359215021133423, + "mean": -0.027279244735836983, + "std": 0.0364469476044178, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8881030678749084, + "max": 0.9261159300804138, + "mean": -0.00014599041605833918, + "std": 0.05328277125954628, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.17176949977874756, + "max": 0.3815639615058899, + "mean": 0.003376794047653675, + "std": 0.03997529670596123, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.0.weight": { + "min": -0.7789531350135803, + "max": 0.725176990032196, + "mean": 1.8912758605438285e-05, + "std": 0.04616439342498779, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.25.1.g": { + "min": 0.3386198878288269, + "max": 1.43718421459198, + "mean": 0.9484164714813232, + "std": 0.2068886160850525, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.7457443475723267, + "max": 1.7046759128570557, + "mean": 0.00022706578602083027, + "std": 0.15868695080280304, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.2048320770263672, + "max": 1.1044596433639526, + "mean": -0.009567854925990105, + "std": 0.20464132726192474, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.4219454526901245, + "max": 0.42726483941078186, + "mean": 6.450812361435965e-05, + "std": 0.04801829159259796, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.830074310302734, + "max": 19.624286651611328, + "mean": -0.24912264943122864, + "std": 4.795468807220459, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.32499611377716064, + "max": 0.43987926840782166, + "mean": -1.1840356819448061e-05, + "std": 0.04616156592965126, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.034201864153146744, + "max": 0.03727949783205986, + "mean": 0.0006420350982807577, + "std": 0.012923939153552055, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.7049213647842407, + "max": 0.6658478379249573, + "mean": 4.366881330497563e-05, + "std": 0.057883720844984055, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.07255180925130844, + "max": 0.06780894845724106, + "mean": -0.00013478109030984342, + "std": 0.012948636896908283, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.3.g": { + "min": 0.38018205761909485, + "max": 1.3912252187728882, + "mean": 1.0665678977966309, + "std": 0.21972529590129852, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6171136498451233, + "max": 0.7182933688163757, + "mean": 0.00011123980220872909, + "std": 0.05802140384912491, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.22050145268440247, + "max": 0.2261514961719513, + "mean": 0.006267528980970383, + "std": 0.04982294142246246, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6300009489059448, + "max": 0.8896978497505188, + "mean": 1.1602171070990153e-05, + "std": 0.023528659716248512, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.5090406537055969, + "max": 0.47603797912597656, + "mean": -0.003031304571777582, + "std": 0.0695611834526062, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.norm_out.g": { + "min": 0.5378094911575317, + "max": 1.184032917022705, + "mean": 0.7829163670539856, + "std": 0.09918713569641113, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.proj_out.weight": { + "min": -0.26840853691101074, + "max": 0.21375010907649994, + "mean": -0.00022396638814825565, + "std": 0.05399699881672859, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ] + }, + "transformer.proj_out.bias": { + "min": -0.23899979889392853, + "max": 0.014829290099442005, + "mean": -0.04399246349930763, + "std": 0.034442439675331116, + "sparsity": 0.0, + "shape": [ + 100 + ] + } + } +} \ No newline at end of file