diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,4683 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "recommendations": { + "focus_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ] + }, + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.429890900850296, + "max": 0.2975340783596039, + "mean": -0.002528043230995536, + "std": 0.042567234486341476, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ] + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.06285920739173889, + "max": 0.10713651776313782, + "mean": 0.0006724470877088606, + "std": 0.03401060774922371, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.4127168655395508, + "max": 0.8372595310211182, + "mean": -0.0001970978337340057, + "std": 0.024115173146128654, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.11470083892345428, + "max": 0.3203592598438263, + "mean": -0.0009399179834872484, + "std": 0.019510779529809952, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.786435842514038, + "max": 2.8647964000701904, + "mean": -0.00036496162647381425, + "std": 0.6155204772949219, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ] + }, + "transformer.input_embed.proj.weight": { + "min": -0.2788304090499878, + "max": 0.38129961490631104, + "mean": 0.00042573572136461735, + "std": 0.042747072875499725, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ] + }, + "transformer.input_embed.proj.bias": { + "min": -0.22175073623657227, + "max": 0.208872988820076, + "mean": -0.0044786068610847, + "std": 0.040869712829589844, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.4284111559391022, + "max": 0.47638577222824097, + "mean": 4.7679491217422765e-06, + "std": 0.024512330070137978, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.32299283146858215, + "max": 0.15659146010875702, + "mean": -0.04666333645582199, + "std": 0.051485899835824966, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.41033437848091125, + "max": 0.35466355085372925, + "mean": -0.00013342559395823628, + "std": 0.023606186732649803, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.2283795177936554, + "max": 0.2609671354293823, + "mean": -0.029088540002703667, + "std": 0.04924432560801506, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.1.g": { + "min": 0.25455695390701294, + "max": 0.8167241811752319, + "mean": 0.5252928733825684, + "std": 0.08043710887432098, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.29693663120269775, + "max": 0.26587796211242676, + "mean": -0.00042661806219257414, + "std": 0.03210223466157913, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.09257981181144714, + "max": 0.12483392655849457, + "mean": 0.0006469582440331578, + "std": 0.02571757137775421, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.29060953855514526, + "max": 0.281120628118515, + "mean": -7.341133459703997e-05, + "std": 0.030930932611227036, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.8982954025268555, + "max": 5.813107013702393, + "mean": -0.009337348863482475, + "std": 1.2953522205352783, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.42515280842781067, + "max": 0.3437501788139343, + "mean": 9.81355260591954e-05, + "std": 0.029954733327031136, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.028982222080230713, + "max": 0.027547072619199753, + "mean": -0.0003299822274129838, + "std": 0.012570270337164402, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.4541594088077545, + "max": 0.44774138927459717, + "mean": 2.4147137082763948e-05, + "std": 0.02385564148426056, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.08854468911886215, + "max": 0.09074825048446655, + "mean": 0.0022885985672473907, + "std": 0.019506951794028282, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.3.g": { + "min": 0.2667747437953949, + "max": 1.0526666641235352, + "mean": 0.5310115814208984, + "std": 0.10401110351085663, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5744121670722961, + "max": 0.6080161333084106, + "mean": -0.00042898603715002537, + "std": 0.038603950291872025, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.1828344166278839, + "max": 0.04558030515909195, + "mean": -0.02944895066320896, + "std": 0.04260854050517082, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.1668061017990112, + "max": 1.6334388256072998, + "mean": 0.0003250878071412444, + "std": 0.02769906260073185, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.1617957502603531, + "max": 0.20511887967586517, + "mean": -0.021121997386217117, + "std": 0.027915872633457184, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.1.g": { + "min": 0.22389063239097595, + "max": 0.8404398560523987, + "mean": 0.48753583431243896, + "std": 0.07487782090902328, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.25540560483932495, + "max": 0.30576375126838684, + "mean": -5.286063242238015e-06, + "std": 0.0334775373339653, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.09518040716648102, + "max": 0.11029241979122162, + "mean": 7.437964086420834e-05, + "std": 0.026927735656499863, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.29654812812805176, + "max": 0.29580071568489075, + "mean": 5.465543654281646e-05, + "std": 0.03255033493041992, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.152629852294922, + "max": 5.073052883148193, + "mean": -0.014528467319905758, + "std": 1.1556384563446045, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.34482821822166443, + "max": 0.3431924283504486, + "mean": 7.847632514312863e-05, + "std": 0.030065450817346573, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.0359608419239521, + "max": 0.03339020535349846, + "mean": -0.00013936487084720284, + "std": 0.013043079525232315, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.31543099880218506, + "max": 0.37475085258483887, + "mean": -1.99221267394023e-05, + "std": 0.024063827469944954, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.1053055077791214, + "max": 0.12205620855093002, + "mean": -0.0019772218074649572, + "std": 0.028851687908172607, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.3.g": { + "min": 0.31148025393486023, + "max": 1.1159186363220215, + "mean": 0.6660937070846558, + "std": 0.09731028974056244, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.8725345730781555, + "max": 0.6275786757469177, + "mean": 0.0016754826065152884, + "std": 0.04743966832756996, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.27123701572418213, + "max": 0.034363195300102234, + "mean": -0.04658954590559006, + "std": 0.040568556636571884, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.9233484268188477, + "max": 0.9644548296928406, + "mean": 0.001022880314849317, + "std": 0.040709808468818665, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.14417493343353271, + "max": 0.07486628741025925, + "mean": -0.00909160915762186, + "std": 0.025672299787402153, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.1.g": { + "min": 0.24042263627052307, + "max": 0.7109521627426147, + "mean": 0.4471237063407898, + "std": 0.05905117839574814, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.2719106674194336, + "max": 0.29774755239486694, + "mean": 9.55516952672042e-06, + "std": 0.035470303148031235, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.11921010911464691, + "max": 0.11835695803165436, + "mean": 0.0007637137896381319, + "std": 0.027623096480965614, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.28068092465400696, + "max": 0.2797088027000427, + "mean": -7.736143015790731e-05, + "std": 0.03509894013404846, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.503926992416382, + "max": 2.515892505645752, + "mean": 0.02668764814734459, + "std": 0.5862060785293579, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.22096332907676697, + "max": 0.2714470624923706, + "mean": 3.3548758437973447e-06, + "std": 0.030734958127141, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.0337090790271759, + "max": 0.03134975582361221, + "mean": 0.00010986338020302355, + "std": 0.012415189296007156, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.2351670116186142, + "max": 0.23143303394317627, + "mean": 5.6707456678850576e-05, + "std": 0.025697972625494003, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.13545046746730804, + "max": 0.12696555256843567, + "mean": -0.00549742579460144, + "std": 0.03995845839381218, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.3.g": { + "min": 0.35431793332099915, + "max": 1.168055772781372, + "mean": 0.7104406356811523, + "std": 0.10342107713222504, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6171623468399048, + "max": 0.5538070201873779, + "mean": 0.0011603726306930184, + "std": 0.04612257331609726, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.187709778547287, + "max": 0.025375014171004295, + "mean": -0.03482068330049515, + "std": 0.028561368584632874, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.1314054727554321, + "max": 0.9714292287826538, + "mean": 0.0003602738433983177, + "std": 0.0423499159514904, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.5970888137817383, + "max": 0.06280609965324402, + "mean": -0.004877342376857996, + "std": 0.028585655614733696, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.1.g": { + "min": 0.37514442205429077, + "max": 0.9365863800048828, + "mean": 0.5923141837120056, + "std": 0.06635680049657822, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.3909958004951477, + "max": 0.36877286434173584, + "mean": 7.174501661211252e-05, + "std": 0.037190962582826614, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11852732300758362, + "max": 0.13606122136116028, + "mean": 0.0009374335058964789, + "std": 0.02925141341984272, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.6188546419143677, + "max": 0.508575975894928, + "mean": 1.5391087799798697e-05, + "std": 0.03644438832998276, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.168816566467285, + "max": 8.769427299499512, + "mean": -0.10911353677511215, + "std": 1.696131944656372, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.2764376997947693, + "max": 0.2397889643907547, + "mean": 5.34953796886839e-05, + "std": 0.03261784091591835, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.05230281502008438, + "max": 0.03951656445860863, + "mean": 8.823134703561664e-05, + "std": 0.01295400783419609, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.23082277178764343, + "max": 0.23429568111896515, + "mean": -2.1679703422705643e-05, + "std": 0.0293941181153059, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.20415563881397247, + "max": 0.1055976152420044, + "mean": -0.004027670249342918, + "std": 0.03260914608836174, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.3.g": { + "min": 0.3398659825325012, + "max": 1.008574366569519, + "mean": 0.7007372975349426, + "std": 0.09649426490068436, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5645706057548523, + "max": 0.8320877552032471, + "mean": 0.00041511692688800395, + "std": 0.042306262999773026, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.21099911630153656, + "max": 0.03097626566886902, + "mean": -0.032180383801460266, + "std": 0.026477735489606857, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7537994980812073, + "max": 0.7179465293884277, + "mean": -7.129359801183455e-06, + "std": 0.03684566915035248, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.2629236578941345, + "max": 0.10548774898052216, + "mean": -0.00303501239977777, + "std": 0.028845027089118958, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.1.g": { + "min": 0.28467807173728943, + "max": 0.6921964883804321, + "mean": 0.49945610761642456, + "std": 0.04626332223415375, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.279328316450119, + "max": 0.23436570167541504, + "mean": -0.00011136279499623924, + "std": 0.03876578062772751, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15460819005966187, + "max": 0.12665635347366333, + "mean": -0.002232019789516926, + "std": 0.03342032432556152, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.41363096237182617, + "max": 0.6597210764884949, + "mean": -2.0344648874015547e-05, + "std": 0.03910161554813385, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.231404781341553, + "max": 4.715085029602051, + "mean": -0.020485566928982735, + "std": 1.0069705247879028, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.2449151873588562, + "max": 0.20747897028923035, + "mean": 4.346559217083268e-05, + "std": 0.033968474715948105, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.03452696651220322, + "max": 0.04465686157345772, + "mean": -1.5960962628014386e-05, + "std": 0.012621430680155754, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.20041236281394958, + "max": 0.20551952719688416, + "mean": -2.960992424050346e-05, + "std": 0.031025830656290054, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.19978956878185272, + "max": 0.11348189413547516, + "mean": -0.002926791785284877, + "std": 0.034484151750802994, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.3.g": { + "min": 0.36731821298599243, + "max": 1.0521864891052246, + "mean": 0.6705360412597656, + "std": 0.06614020466804504, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.39791443943977356, + "max": 0.5023131966590881, + "mean": -3.831370850093663e-05, + "std": 0.04114069044589996, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.1279803365468979, + "max": 0.026696184650063515, + "mean": -0.030547261238098145, + "std": 0.021858656778931618, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.44846877455711365, + "max": 0.43229183554649353, + "mean": 8.759970660321414e-05, + "std": 0.034898921847343445, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.2670278549194336, + "max": 0.07220447063446045, + "mean": -0.0011172632221132517, + "std": 0.023101668804883957, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.1.g": { + "min": 0.2872157692909241, + "max": 0.6838868260383606, + "mean": 0.5244971513748169, + "std": 0.047394201159477234, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.22190631926059723, + "max": 0.22351428866386414, + "mean": 1.5601781342411414e-05, + "std": 0.038955170661211014, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.13637839257717133, + "max": 0.10904650390148163, + "mean": 0.0002307215763721615, + "std": 0.02925163321197033, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.37520402669906616, + "max": 0.4367537200450897, + "mean": -9.730283636599779e-06, + "std": 0.03929009288549423, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.8370232582092285, + "max": 4.988061904907227, + "mean": 0.0097434613853693, + "std": 0.8443066477775574, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.22342386841773987, + "max": 0.21985094249248505, + "mean": -9.139148460235447e-08, + "std": 0.034415289759635925, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.04353320971131325, + "max": 0.03576282411813736, + "mean": -0.0002566012553870678, + "std": 0.012079274281859398, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.2132977545261383, + "max": 0.18884801864624023, + "mean": -1.671975405770354e-05, + "std": 0.031542494893074036, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.1805061399936676, + "max": 0.12078476697206497, + "mean": -0.0024164910428225994, + "std": 0.041246652603149414, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.3.g": { + "min": 0.42202678322792053, + "max": 0.9410442113876343, + "mean": 0.6627340912818909, + "std": 0.056649643927812576, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.3713216483592987, + "max": 0.47501668334007263, + "mean": -8.242137118941173e-05, + "std": 0.04089945927262306, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.20792357623577118, + "max": 0.027002831920981407, + "mean": -0.03024197369813919, + "std": 0.02132386527955532, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.33984270691871643, + "max": 0.7327128648757935, + "mean": 8.53092860779725e-05, + "std": 0.03477407246828079, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.23982134461402893, + "max": 0.050322338938713074, + "mean": -0.0011965972371399403, + "std": 0.020453661680221558, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.1.g": { + "min": 0.3062271773815155, + "max": 0.6509252786636353, + "mean": 0.5250095725059509, + "std": 0.04592073708772659, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.30402758717536926, + "max": 0.21729634702205658, + "mean": 7.005365478107706e-05, + "std": 0.03949893265962601, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.14918896555900574, + "max": 0.13127601146697998, + "mean": 0.00036064194864593446, + "std": 0.030438335612416267, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.25730884075164795, + "max": 0.20225763320922852, + "mean": 3.0886923923389986e-05, + "std": 0.03948678448796272, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.334343671798706, + "max": 2.3739240169525146, + "mean": -0.02623903937637806, + "std": 0.4496191143989563, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.1891229748725891, + "max": 0.21049852669239044, + "mean": 3.720186577993445e-05, + "std": 0.03480042889714241, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.03178652375936508, + "max": 0.03553091734647751, + "mean": -0.0002019420498982072, + "std": 0.012286705896258354, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.18846523761749268, + "max": 0.1703805774450302, + "mean": -6.774859502911568e-05, + "std": 0.032177072018384933, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.13940556347370148, + "max": 0.13744769990444183, + "mean": -0.0025155385956168175, + "std": 0.051295846700668335, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.3.g": { + "min": 0.4672105014324188, + "max": 0.9528681039810181, + "mean": 0.6688433885574341, + "std": 0.05244635045528412, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.3241286277770996, + "max": 0.3096275329589844, + "mean": -1.696625076874625e-06, + "std": 0.04095519334077835, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.1246853619813919, + "max": 0.025154586881399155, + "mean": -0.03071470744907856, + "std": 0.019795699045062065, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.43982067704200745, + "max": 0.44470375776290894, + "mean": 9.459229477215558e-05, + "std": 0.03512655198574066, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.22400110960006714, + "max": 0.05141644552350044, + "mean": -0.0011801186483353376, + "std": 0.018454499542713165, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.1.g": { + "min": 0.33955061435699463, + "max": 0.7357662320137024, + "mean": 0.55861496925354, + "std": 0.04118064045906067, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.2722431421279907, + "max": 0.27798357605934143, + "mean": 1.9865790818585083e-05, + "std": 0.04106421023607254, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.1370246559381485, + "max": 0.1397887021303177, + "mean": 0.0004894830053672194, + "std": 0.026618896052241325, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.4905315637588501, + "max": 0.3558432161808014, + "mean": 8.873307524481788e-05, + "std": 0.04070229455828667, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.291904926300049, + "max": 1.7411547899246216, + "mean": -0.02105572447180748, + "std": 0.4997440576553345, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.2170916199684143, + "max": 0.19797761738300323, + "mean": -4.09621607104782e-05, + "std": 0.034239448606967926, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.0413656160235405, + "max": 0.038547735661268234, + "mean": -0.00015065219486132264, + "std": 0.012881237082183361, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17731794714927673, + "max": 0.18395833671092987, + "mean": 4.7481313231401145e-05, + "std": 0.03156236186623573, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.17941592633724213, + "max": 0.18339262902736664, + "mean": -0.0022199342492967844, + "std": 0.05482170730829239, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.3.g": { + "min": 0.4743531346321106, + "max": 1.0208531618118286, + "mean": 0.6452549695968628, + "std": 0.04991196468472481, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.2717994153499603, + "max": 0.3095380365848541, + "mean": 0.00011231788084842265, + "std": 0.04069165140390396, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.10581093281507492, + "max": 0.02687394618988037, + "mean": -0.029505720362067223, + "std": 0.01791212521493435, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.3386741280555725, + "max": 0.3290008306503296, + "mean": 5.870793393114582e-05, + "std": 0.03442065790295601, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.18140022456645966, + "max": 0.041891518980264664, + "mean": -0.0010755020193755627, + "std": 0.017211386933922768, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.1.g": { + "min": 0.32555529475212097, + "max": 0.6836872696876526, + "mean": 0.5111882090568542, + "std": 0.03670286759734154, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.2333182841539383, + "max": 0.22538095712661743, + "mean": -3.595184534788132e-05, + "std": 0.03918481990695, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.11544553935527802, + "max": 0.13142207264900208, + "mean": 0.00015133176930248737, + "std": 0.029199015349149704, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.3520807921886444, + "max": 0.2848276197910309, + "mean": 7.631589141965378e-06, + "std": 0.03925250843167305, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.123228073120117, + "max": 3.5356757640838623, + "mean": -0.011553899385035038, + "std": 0.6816845536231995, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.2112175077199936, + "max": 0.20856595039367676, + "mean": 3.472584648989141e-05, + "std": 0.03449223190546036, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.03566575422883034, + "max": 0.0481027290225029, + "mean": 0.0007965473923832178, + "std": 0.01284803170710802, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.21010246872901917, + "max": 0.19273991882801056, + "mean": -1.5139250990614528e-06, + "std": 0.031702835112810135, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.1862909346818924, + "max": 0.17676132917404175, + "mean": -0.0028484249487519264, + "std": 0.0586179718375206, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.3.g": { + "min": 0.47472548484802246, + "max": 1.0383955240249634, + "mean": 0.6513745784759521, + "std": 0.049231819808483124, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.24837423861026764, + "max": 0.3289947211742401, + "mean": 0.00018063507741317153, + "std": 0.04057996720075607, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.1235797256231308, + "max": 0.024505803361535072, + "mean": -0.0304916650056839, + "std": 0.01757434755563736, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.4211723804473877, + "max": 0.48196032643318176, + "mean": 1.983910806302447e-06, + "std": 0.03540581464767456, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.1518622189760208, + "max": 0.04325510933995247, + "mean": 3.965849464293569e-05, + "std": 0.014866944402456284, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.1.g": { + "min": 0.31559497117996216, + "max": 0.6791313290596008, + "mean": 0.552861213684082, + "std": 0.040544018149375916, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.20591191947460175, + "max": 0.21929602324962616, + "mean": 3.05178873531986e-05, + "std": 0.03830549493432045, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.13762998580932617, + "max": 0.11262793093919754, + "mean": 2.1001505956519395e-05, + "std": 0.02581183984875679, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.4020220637321472, + "max": 0.3705553412437439, + "mean": 2.6537300072959624e-05, + "std": 0.03818797320127487, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.767557382583618, + "max": 2.8661978244781494, + "mean": 0.00114790303632617, + "std": 0.5165696144104004, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.2021435797214508, + "max": 0.19701559841632843, + "mean": 2.942326318589039e-05, + "std": 0.03430229425430298, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.051028795540332794, + "max": 0.03999846801161766, + "mean": -0.0004189596220385283, + "std": 0.01342750433832407, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.19608062505722046, + "max": 0.20127296447753906, + "mean": -1.228029668709496e-05, + "std": 0.0318099670112133, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.19270533323287964, + "max": 0.1945824921131134, + "mean": -0.0029681914020329714, + "std": 0.06255524605512619, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.3.g": { + "min": 0.34965983033180237, + "max": 1.0794146060943604, + "mean": 0.6671044826507568, + "std": 0.054688673466444016, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22492384910583496, + "max": 0.2511879801750183, + "mean": 0.0003592889988794923, + "std": 0.04076888784766197, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.0908823236823082, + "max": 0.04379650950431824, + "mean": -0.030081426724791527, + "std": 0.01758776418864727, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.35308927297592163, + "max": 0.3038119673728943, + "mean": -4.2369181755930185e-05, + "std": 0.03713066130876541, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.16173776984214783, + "max": 0.06332767009735107, + "mean": -8.476080256514251e-05, + "std": 0.019383691251277924, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.1.g": { + "min": 0.34886276721954346, + "max": 0.7204337120056152, + "mean": 0.5423545241355896, + "std": 0.03890771418809891, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.2189498394727707, + "max": 0.22237031161785126, + "mean": -1.0949186616926454e-05, + "std": 0.03923875838518143, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.11818630248308182, + "max": 0.1705242395401001, + "mean": 0.0002858135849237442, + "std": 0.025103183463215828, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.24609290063381195, + "max": 0.30029821395874023, + "mean": -3.647123230621219e-05, + "std": 0.03893830627202988, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.5019514560699463, + "max": 3.711169481277466, + "mean": 0.015843264758586884, + "std": 0.7819090485572815, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.21829943358898163, + "max": 0.23758333921432495, + "mean": -1.3816705177305266e-05, + "std": 0.03631007671356201, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04714132845401764, + "max": 0.051366791129112244, + "mean": 0.00047747697681188583, + "std": 0.01350868958979845, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.21323293447494507, + "max": 0.2170214205980301, + "mean": 5.658239751937799e-05, + "std": 0.033622127026319504, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.21135154366493225, + "max": 0.23155677318572998, + "mean": -0.005110344383865595, + "std": 0.06187622249126434, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.3.g": { + "min": 0.36206167936325073, + "max": 1.097632884979248, + "mean": 0.6992448568344116, + "std": 0.05318887159228325, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.23417295515537262, + "max": 0.2448265254497528, + "mean": 0.0004635582445189357, + "std": 0.04127749800682068, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.09782810509204865, + "max": 0.06829667091369629, + "mean": -0.031430259346961975, + "std": 0.018095970153808594, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.30144715309143066, + "max": 0.3511406481266022, + "mean": -8.084578439593315e-05, + "std": 0.04028310999274254, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.15208296477794647, + "max": 0.1494162231683731, + "mean": 0.0002504626754671335, + "std": 0.023021113127470016, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.1.g": { + "min": 0.9953764081001282, + "max": 1.0005042552947998, + "mean": 0.9992995858192444, + "std": 0.00161725003272295, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.031269513070583344, + "max": 0.031265489757061005, + "mean": -1.9295868696644902e-05, + "std": 0.018045131117105484, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.031223546713590622, + "max": 0.0309983491897583, + "mean": -0.0010843857889994979, + "std": 0.017954815179109573, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.03126491606235504, + "max": 0.03126438334584236, + "mean": 3.5442317312117666e-06, + "std": 0.018045514822006226, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.031160738319158554, + "max": 0.03118434175848961, + "mean": 0.00033380728564225137, + "std": 0.01806693710386753, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": -0.0004188704479020089, + "max": 0.00032652742811478674, + "mean": -3.7413692552945577e-06, + "std": 9.604167280485854e-05, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.3.g": { + "min": 0.9950032234191895, + "max": 1.000982403755188, + "mean": 0.9997574090957642, + "std": 0.0010362789034843445, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.03225700929760933, + "max": 0.032385751605033875, + "mean": -9.290525667893235e-06, + "std": 0.01804504171013832, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.03201417997479439, + "max": 0.03202167525887489, + "mean": 0.0002501691924408078, + "std": 0.018027769401669502, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.11.4.ff.2.weight": { + "min": -0.0008222123724408448, + "max": 0.0007597835501655936, + "mean": -1.4037771052244352e-06, + "std": 0.0001422762288711965, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.11.4.ff.2.bias": { + "min": -0.0004344022599980235, + "max": 0.000338842801284045, + "mean": -5.246626642474439e-06, + "std": 8.8350752776023e-05, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.1.g": { + "min": 0.3827516734600067, + "max": 0.7182729244232178, + "mean": 0.5806694030761719, + "std": 0.03871554881334305, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.23742133378982544, + "max": 0.19636878371238708, + "mean": 2.6759680622490123e-05, + "std": 0.037471406161785126, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.1184450015425682, + "max": 0.16545724868774414, + "mean": 0.0009931407403200865, + "std": 0.027538597583770752, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.2451958954334259, + "max": 0.49966853857040405, + "mean": -5.0392896810080856e-05, + "std": 0.0376293808221817, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.932778835296631, + "max": 3.76035213470459, + "mean": -0.003568061627447605, + "std": 0.6805727481842041, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.22708982229232788, + "max": 0.2511258125305176, + "mean": -1.143130793934688e-05, + "std": 0.037441134452819824, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07165413349866867, + "max": 0.08049532026052475, + "mean": -0.0005234142299741507, + "std": 0.015659447759389877, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.22785918414592743, + "max": 0.25734248757362366, + "mean": -2.8539496270241216e-05, + "std": 0.035427965223789215, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.19991812109947205, + "max": 0.214930921792984, + "mean": -0.005538000259548426, + "std": 0.06830835342407227, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.3.g": { + "min": 0.40544652938842773, + "max": 1.1868609189987183, + "mean": 0.7379507422447205, + "std": 0.05492096021771431, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.22111627459526062, + "max": 0.2460324913263321, + "mean": 0.0005210894159972668, + "std": 0.04134552925825119, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.10342609882354736, + "max": 0.024193264544010162, + "mean": -0.03266071155667305, + "std": 0.018867699429392815, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.448818176984787, + "max": 0.4217819571495056, + "mean": -0.000431257882155478, + "std": 0.04690708965063095, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.2508312165737152, + "max": 0.46896737813949585, + "mean": 0.00319076469168067, + "std": 0.04450752213597298, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.0.weight": { + "min": -0.3169752359390259, + "max": 0.33314692974090576, + "mean": -2.5337005354231223e-05, + "std": 0.021293330937623978, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.13.1.g": { + "min": 0.32465165853500366, + "max": 0.6822460889816284, + "mean": 0.5709546208381653, + "std": 0.04454142227768898, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.16416817903518677, + "max": 0.1733636111021042, + "mean": -4.858425018028356e-05, + "std": 0.03318599984049797, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18635453283786774, + "max": 0.1423773616552353, + "mean": 4.034899757243693e-05, + "std": 0.02966292016208172, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.37941935658454895, + "max": 0.24537599086761475, + "mean": -1.0037202628154773e-05, + "std": 0.03276722505688667, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.6522655487060547, + "max": 3.2869510650634766, + "mean": -0.014257419854402542, + "std": 0.9848745465278625, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.23496949672698975, + "max": 0.24738511443138123, + "mean": -1.7606289475224912e-05, + "std": 0.04170484468340874, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.07273464649915695, + "max": 0.15422259271144867, + "mean": 0.0006638166960328817, + "std": 0.025166962295770645, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.2664797306060791, + "max": 0.248508021235466, + "mean": -1.5497178537771106e-05, + "std": 0.04014508053660393, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.18958289921283722, + "max": 0.19478872418403625, + "mean": -0.0012272386811673641, + "std": 0.06668190658092499, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.3.g": { + "min": 0.32911282777786255, + "max": 0.9983987808227539, + "mean": 0.7191941142082214, + "std": 0.0522039495408535, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.23135632276535034, + "max": 0.24583274126052856, + "mean": 0.00018275347247254103, + "std": 0.04090878367424011, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11377062648534775, + "max": 0.018522411584854126, + "mean": -0.04246858134865761, + "std": 0.018818210810422897, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.3897111713886261, + "max": 0.40687721967697144, + "mean": -2.178383874706924e-05, + "std": 0.04854356870055199, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.6922244429588318, + "max": 0.4119531214237213, + "mean": 0.0008513483917340636, + "std": 0.060246195644140244, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.0.weight": { + "min": -0.0007574164774268866, + "max": 1.0006382465362549, + "mean": 0.0004883571527898312, + "std": 0.022093627601861954, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.14.1.g": { + "min": 0.995638906955719, + "max": 1.000357985496521, + "mean": 0.9993537068367004, + "std": 0.001561639248393476, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.03126733377575874, + "max": 0.031276635825634, + "mean": -2.102728103636764e-05, + "std": 0.01803644187748432, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.03121519461274147, + "max": 0.031229794025421143, + "mean": -0.000677098985761404, + "std": 0.017830997705459595, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.03127024322748184, + "max": 0.03126488998532295, + "mean": -8.836910637910478e-06, + "std": 0.018035493791103363, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.031232407316565514, + "max": 0.031246833503246307, + "mean": -0.0007298535201698542, + "std": 0.0179455429315567, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": -0.00021961150923743844, + "max": 0.00025036477018147707, + "mean": -8.001849209904321e-07, + "std": 8.148775668814778e-05, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.3.g": { + "min": 0.995234489440918, + "max": 1.0012273788452148, + "mean": 0.9999035596847534, + "std": 0.001056881621479988, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.03210779279470444, + "max": 0.03193911164999008, + "mean": 5.988833436276764e-06, + "std": 0.018047882243990898, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.031279150396585464, + "max": 0.031749434769153595, + "mean": 0.00044275011168792844, + "std": 0.018095213919878006, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.14.4.ff.2.weight": { + "min": -0.0007249970221891999, + "max": 0.0007807987276464701, + "mean": -3.5197314218748943e-07, + "std": 0.00014107293100096285, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.14.4.ff.2.bias": { + "min": -0.00022946292301639915, + "max": 0.00021843933791387826, + "mean": -1.2389690482450533e-06, + "std": 7.586943684145808e-05, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.0.weight": { + "min": -0.23457658290863037, + "max": 0.2724316418170929, + "mean": 7.120183454389917e-06, + "std": 0.01881435327231884, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.15.1.g": { + "min": 0.32128995656967163, + "max": 0.692602813243866, + "mean": 0.5816522836685181, + "std": 0.04586285352706909, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.18137724697589874, + "max": 0.19706015288829803, + "mean": -1.1772945072152652e-05, + "std": 0.03318871185183525, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.1606057584285736, + "max": 0.12942680716514587, + "mean": -0.0010653780773282051, + "std": 0.03413666784763336, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.3314096927642822, + "max": 0.3108590841293335, + "mean": -1.029382929118583e-05, + "std": 0.03223954886198044, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.800930500030518, + "max": 8.760626792907715, + "mean": 0.09345310181379318, + "std": 1.6193360090255737, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23322908580303192, + "max": 0.24158968031406403, + "mean": 4.1257830162066966e-05, + "std": 0.040864504873752594, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07589098066091537, + "max": 0.06572694331407547, + "mean": 0.00047726332559250295, + "std": 0.019406452775001526, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.24502328038215637, + "max": 0.23352351784706116, + "mean": -2.668632077984512e-06, + "std": 0.039439182728528976, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.16295023262500763, + "max": 0.16059955954551697, + "mean": 0.0016356806736439466, + "std": 0.06525918841362, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.3.g": { + "min": 0.556554913520813, + "max": 0.9408271312713623, + "mean": 0.7128406167030334, + "std": 0.039769869297742844, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.22860386967658997, + "max": 0.25511136651039124, + "mean": -4.539915607892908e-05, + "std": 0.04058451950550079, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.13515348732471466, + "max": 0.02234305441379547, + "mean": -0.04134881868958473, + "std": 0.01836741715669632, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.4212746024131775, + "max": 0.39222264289855957, + "mean": -4.234017978888005e-06, + "std": 0.047794174402952194, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.6065890789031982, + "max": 0.6503084897994995, + "mean": 0.0015799436951056123, + "std": 0.056790802627801895, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.0.weight": { + "min": -0.25144556164741516, + "max": 0.3204054832458496, + "mean": -5.961472197668627e-06, + "std": 0.019617972895503044, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.16.1.g": { + "min": 0.36011484265327454, + "max": 0.6801881790161133, + "mean": 0.5707067251205444, + "std": 0.04279083386063576, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.22022095322608948, + "max": 0.17668727040290833, + "mean": -3.4830391086870804e-05, + "std": 0.034304577857255936, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.16363094747066498, + "max": 0.2328542321920395, + "mean": 0.0003622955409809947, + "std": 0.03286634013056755, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.26301464438438416, + "max": 0.23922747373580933, + "mean": -5.2115137805230916e-05, + "std": 0.03390384837985039, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.843376159667969, + "max": 5.079013824462891, + "mean": 0.043839357793331146, + "std": 1.2277964353561401, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.24616090953350067, + "max": 0.24996501207351685, + "mean": 7.23035482224077e-05, + "std": 0.04399650916457176, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.06268942356109619, + "max": 0.054509397596120834, + "mean": 0.0006487497594207525, + "std": 0.017188087105751038, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.2859387695789337, + "max": 0.27142879366874695, + "mean": -4.999006341677159e-05, + "std": 0.04299502447247505, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.16028116643428802, + "max": 0.1701924204826355, + "mean": -0.00288166431710124, + "std": 0.05925562232732773, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.3.g": { + "min": 0.5195892453193665, + "max": 0.9285021424293518, + "mean": 0.71345454454422, + "std": 0.03798013553023338, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23824341595172882, + "max": 0.24957609176635742, + "mean": 0.0004649516486097127, + "std": 0.040465425699949265, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.1440071016550064, + "max": 0.041583579033613205, + "mean": -0.03968297317624092, + "std": 0.020529083907604218, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.5325517058372498, + "max": 0.5824555158615112, + "mean": 5.4546726460102946e-06, + "std": 0.04887215048074722, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5182770490646362, + "max": 0.4927639365196228, + "mean": 0.002359384670853615, + "std": 0.05340024083852768, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.0.weight": { + "min": -0.27337488532066345, + "max": 0.3148258626461029, + "mean": 1.8105949948221678e-06, + "std": 0.020055659115314484, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.17.1.g": { + "min": 0.36668556928634644, + "max": 0.7091761827468872, + "mean": 0.5931493639945984, + "std": 0.04574775695800781, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.2106715887784958, + "max": 0.1992705911397934, + "mean": 3.0829094612272456e-05, + "std": 0.03486945852637291, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.18688145279884338, + "max": 0.2038576900959015, + "mean": 0.0009574516443535686, + "std": 0.03150374814867973, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.2888670563697815, + "max": 0.33895108103752136, + "mean": -4.766129131894559e-05, + "std": 0.03459092602133751, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.8705790042877197, + "max": 3.3815643787384033, + "mean": 0.014464044943451881, + "std": 0.8578398823738098, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.2241480052471161, + "max": 0.24975183606147766, + "mean": -4.014226306026103e-06, + "std": 0.04223877936601639, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.0549103245139122, + "max": 0.04695763811469078, + "mean": -1.4065793948248029e-05, + "std": 0.015847966074943542, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.2923896610736847, + "max": 0.2908935844898224, + "mean": -7.1035901783034205e-06, + "std": 0.04195380210876465, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.12478198111057281, + "max": 0.2591152787208557, + "mean": -0.003229282796382904, + "std": 0.053138162940740585, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.3.g": { + "min": 0.45623326301574707, + "max": 0.8426384925842285, + "mean": 0.7055743336677551, + "std": 0.034994304180145264, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.5110324621200562, + "max": 0.3488520383834839, + "mean": 0.00034251363831572235, + "std": 0.04021010175347328, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.18705947697162628, + "max": 0.03953401744365692, + "mean": -0.03937750309705734, + "std": 0.02131262607872486, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.5440298318862915, + "max": 0.5563207864761353, + "mean": -7.213428762042895e-05, + "std": 0.050746381282806396, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5106754302978516, + "max": 0.662798285484314, + "mean": 0.002447732724249363, + "std": 0.04947002977132797, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.0.weight": { + "min": -0.33220773935317993, + "max": 0.2652227580547333, + "mean": 3.882123110088287e-06, + "std": 0.01939382590353489, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.18.1.g": { + "min": 0.32238951325416565, + "max": 0.764789879322052, + "mean": 0.6509858965873718, + "std": 0.0451430045068264, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.24893951416015625, + "max": 0.219136044383049, + "mean": -2.739794126682682e-06, + "std": 0.036503732204437256, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.32658451795578003, + "max": 0.28703945875167847, + "mean": -0.0006784016732126474, + "std": 0.038509681820869446, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.3096793591976166, + "max": 0.3693031072616577, + "mean": 6.47535634925589e-05, + "std": 0.036244187504053116, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.706123352050781, + "max": 5.793623447418213, + "mean": 0.03790595382452011, + "std": 1.4113690853118896, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.22146277129650116, + "max": 0.20545163750648499, + "mean": -7.498646300518885e-05, + "std": 0.042494479566812515, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.07756227254867554, + "max": 0.05129515379667282, + "mean": -0.0009279022924602032, + "std": 0.016406826674938202, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.33102676272392273, + "max": 0.3289909064769745, + "mean": -5.028288796893321e-06, + "std": 0.042801517993211746, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.28435027599334717, + "max": 0.111260324716568, + "mean": -0.001205979730002582, + "std": 0.04699746519327164, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.3.g": { + "min": 0.4868572950363159, + "max": 0.8827712535858154, + "mean": 0.7374467849731445, + "std": 0.03787440061569214, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.3608104884624481, + "max": 0.2736315429210663, + "mean": 5.1337454351596534e-05, + "std": 0.04065750911831856, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.24695155024528503, + "max": 0.04662873595952988, + "mean": -0.039258524775505066, + "std": 0.023203320801258087, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.6257067322731018, + "max": 0.5967472195625305, + "mean": -6.336745718726888e-05, + "std": 0.05312981456518173, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.7091463208198547, + "max": 0.26562684774398804, + "mean": 0.0009212760487571359, + "std": 0.051211755722761154, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.0.weight": { + "min": -0.34325337409973145, + "max": 0.30324116349220276, + "mean": 1.430171323590912e-07, + "std": 0.019143851473927498, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.19.1.g": { + "min": 0.34994906187057495, + "max": 0.7801994681358337, + "mean": 0.6388012170791626, + "std": 0.04902452602982521, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.20566730201244354, + "max": 0.2065981775522232, + "mean": -6.0025900893379e-05, + "std": 0.03770073875784874, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.25845062732696533, + "max": 0.268261194229126, + "mean": -0.00040606403490528464, + "std": 0.04461587592959404, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.3532998263835907, + "max": 0.3217300474643707, + "mean": -7.498586455767509e-06, + "std": 0.037208717316389084, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.249058246612549, + "max": 4.194725036621094, + "mean": -0.02638459950685501, + "std": 1.005539894104004, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.2386980652809143, + "max": 0.24372872710227966, + "mean": -2.586210030131042e-05, + "std": 0.04321879521012306, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.062367696315050125, + "max": 0.05657341331243515, + "mean": 0.0003560591721907258, + "std": 0.01414806954562664, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.43753641843795776, + "max": 0.37365373969078064, + "mean": 1.460490602767095e-05, + "std": 0.044131483882665634, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.09578664600849152, + "max": 0.17602641880512238, + "mean": -0.0006584142101928592, + "std": 0.0351262167096138, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.3.g": { + "min": 0.42189696431159973, + "max": 1.0643466711044312, + "mean": 0.7485300302505493, + "std": 0.04179271310567856, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.265593945980072, + "max": 0.29676973819732666, + "mean": -7.866104715503752e-05, + "std": 0.04081883281469345, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.18380795419216156, + "max": 0.04289933666586876, + "mean": -0.036790553480386734, + "std": 0.02553965151309967, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.4579704999923706, + "max": 0.4863548278808594, + "mean": 4.272036676411517e-05, + "std": 0.05422580987215042, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.2855266034603119, + "max": 0.5506117939949036, + "mean": -0.0008784987148828804, + "std": 0.047787394374608994, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.0.weight": { + "min": -0.2924049496650696, + "max": 0.32256847620010376, + "mean": 5.68283303437056e-06, + "std": 0.01997658796608448, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.20.1.g": { + "min": 0.29146960377693176, + "max": 0.7568098902702332, + "mean": 0.6507450938224792, + "std": 0.05195383355021477, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.2434154599905014, + "max": 0.26121068000793457, + "mean": -5.642844371323008e-06, + "std": 0.039615679532289505, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.2669755518436432, + "max": 0.19996695220470428, + "mean": -0.0008783398079685867, + "std": 0.051739659160375595, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.27164191007614136, + "max": 0.25313133001327515, + "mean": 5.889336534892209e-06, + "std": 0.03871198371052742, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_k.bias": { + "min": -12.952698707580566, + "max": 15.9312744140625, + "mean": 0.03322799503803253, + "std": 1.9877989292144775, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.20647653937339783, + "max": 0.2256641685962677, + "mean": -7.246333552757278e-05, + "std": 0.040561433881521225, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06935624778270721, + "max": 0.06306472420692444, + "mean": 0.00016317634435836226, + "std": 0.014748629182577133, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.4654642939567566, + "max": 0.31973931193351746, + "mean": 1.960094778041821e-05, + "std": 0.04059756174683571, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06414826959371567, + "max": 0.11558651179075241, + "mean": 0.0012002706062048674, + "std": 0.024707410484552383, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.3.g": { + "min": 0.3750652074813843, + "max": 0.9275709390640259, + "mean": 0.7511184215545654, + "std": 0.03999503329396248, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.2787969410419464, + "max": 0.2728310525417328, + "mean": -0.00016816731658764184, + "std": 0.0410102978348732, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.19773395359516144, + "max": 0.05162842571735382, + "mean": -0.03201429173350334, + "std": 0.025033777579665184, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.6583139300346375, + "max": 0.5351659655570984, + "mean": -5.119909474160522e-05, + "std": 0.05286192148923874, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.1919519156217575, + "max": 0.5808603763580322, + "mean": -0.0005111135542392731, + "std": 0.04104519635438919, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.0.weight": { + "min": -0.4174348711967468, + "max": 0.3718706965446472, + "mean": 6.703614417347126e-06, + "std": 0.021633952856063843, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.21.1.g": { + "min": 0.21479681134223938, + "max": 0.7478918433189392, + "mean": 0.6493618488311768, + "std": 0.054201409220695496, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.20870910584926605, + "max": 0.1947445124387741, + "mean": 4.020327469334006e-05, + "std": 0.03945876285433769, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.32888734340667725, + "max": 0.25908946990966797, + "mean": -0.003229741007089615, + "std": 0.05623537674546242, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.2056186944246292, + "max": 0.2540878653526306, + "mean": 5.3863834182266146e-05, + "std": 0.03856115788221359, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.228662967681885, + "max": 6.915782928466797, + "mean": 0.04823269695043564, + "std": 1.3832472562789917, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.20932167768478394, + "max": 0.22993139922618866, + "mean": -4.4988796616962645e-06, + "std": 0.04132062569260597, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.04368359223008156, + "max": 0.035936541855335236, + "mean": -1.0926916729658842e-05, + "std": 0.012798542156815529, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.3968988060951233, + "max": 0.34478238224983215, + "mean": -5.5305037676589563e-05, + "std": 0.04239818826317787, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.05508316308259964, + "max": 0.06261169910430908, + "mean": 0.0003532343253027648, + "std": 0.018669025972485542, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.3.g": { + "min": 0.3511422276496887, + "max": 1.0404622554779053, + "mean": 0.7897100448608398, + "std": 0.048514608293771744, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.3338225483894348, + "max": 0.38620951771736145, + "mean": -0.00016899823094718158, + "std": 0.04149709641933441, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.15740133821964264, + "max": 0.058948904275894165, + "mean": -0.0318116769194603, + "std": 0.025069545954465866, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6960089206695557, + "max": 0.46894899010658264, + "mean": -8.237230940721929e-05, + "std": 0.05181308463215828, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.24741840362548828, + "max": 0.3286932408809662, + "mean": -0.00026996995438821614, + "std": 0.04144337400794029, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.0.weight": { + "min": -0.28653645515441895, + "max": 0.35008078813552856, + "mean": -2.9175917006796226e-06, + "std": 0.024247299879789352, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.22.1.g": { + "min": 0.19693201780319214, + "max": 0.7785046696662903, + "mean": 0.670115053653717, + "std": 0.058539655059576035, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.228579580783844, + "max": 0.23089821636676788, + "mean": -2.1206951714702882e-05, + "std": 0.040444690734148026, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.22008375823497772, + "max": 0.24102427065372467, + "mean": 0.0007767346687614918, + "std": 0.055866289883852005, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.21646404266357422, + "max": 0.2256259322166443, + "mean": -7.261607970576733e-05, + "std": 0.03937656059861183, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.884381294250488, + "max": 9.046843528747559, + "mean": -0.0012065814808011055, + "std": 1.8454406261444092, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.2685357332229614, + "max": 0.2581280469894409, + "mean": 4.3568383262027055e-05, + "std": 0.03841337561607361, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.057995330542325974, + "max": 0.05802358686923981, + "mean": 0.00035532776382751763, + "std": 0.014707793481647968, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.2625483274459839, + "max": 0.2874881625175476, + "mean": -6.166227103676647e-05, + "std": 0.039080966264009476, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.0441780760884285, + "max": 0.03726305067539215, + "mean": -0.00010403832129668444, + "std": 0.013333701528608799, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.3.g": { + "min": 0.3392186760902405, + "max": 1.088745355606079, + "mean": 0.8640130758285522, + "std": 0.06376548111438751, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.42300641536712646, + "max": 0.41883379220962524, + "mean": 0.00031391510856337845, + "std": 0.04352227598428726, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.21468287706375122, + "max": 0.1707322746515274, + "mean": -0.02942698448896408, + "std": 0.03183940798044205, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.5976030826568604, + "max": 0.559415340423584, + "mean": -0.00014561890566255897, + "std": 0.05347010865807533, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17889779806137085, + "max": 0.3772771656513214, + "mean": 0.001343069365248084, + "std": 0.03730209544301033, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.0.weight": { + "min": -0.39426180720329285, + "max": 0.36868590116500854, + "mean": 3.8257519918261096e-05, + "std": 0.0286222156137228, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.23.1.g": { + "min": 0.2908227741718292, + "max": 0.8264791369438171, + "mean": 0.7054398655891418, + "std": 0.0677274614572525, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.926691472530365, + "max": 1.0270028114318848, + "mean": -2.8848577130702324e-05, + "std": 0.04765753820538521, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.878186821937561, + "max": 0.8147233724594116, + "mean": -0.0002844139817170799, + "std": 0.09543365985155106, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.27030670642852783, + "max": 0.24055372178554535, + "mean": -2.2271982743404806e-05, + "std": 0.038951653987169266, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.68506622314453, + "max": 22.795772552490234, + "mean": -0.09177836775779724, + "std": 4.062017440795898, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.22721800208091736, + "max": 0.24524104595184326, + "mean": -2.5419916710234247e-05, + "std": 0.038644734770059586, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.059977784752845764, + "max": 0.04509967938065529, + "mean": -0.00013076608593109995, + "std": 0.01468411460518837, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.3371436893939972, + "max": 0.3742288053035736, + "mean": 7.546843335148878e-06, + "std": 0.04082665964961052, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.04609467089176178, + "max": 0.19514600932598114, + "mean": 0.00027449309709481895, + "std": 0.013541752472519875, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.3.g": { + "min": 0.37357744574546814, + "max": 1.125421166419983, + "mean": 0.8902103900909424, + "std": 0.06386467814445496, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.447258859872818, + "max": 0.5423630475997925, + "mean": 2.548232805565931e-05, + "std": 0.045591775327920914, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.22343683242797852, + "max": 0.08690512925386429, + "mean": -0.03200257197022438, + "std": 0.03771420195698738, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7260164022445679, + "max": 0.6879873275756836, + "mean": 3.631926665548235e-05, + "std": 0.05180613696575165, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.17385190725326538, + "max": 0.21751302480697632, + "mean": 3.567736712284386e-05, + "std": 0.03174319490790367, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.0.weight": { + "min": -0.3385016918182373, + "max": 0.37161216139793396, + "mean": 4.3165768147446215e-05, + "std": 0.0341353677213192, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.24.1.g": { + "min": 0.31760096549987793, + "max": 1.2830872535705566, + "mean": 0.6014329195022583, + "std": 0.08317635953426361, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.28283271193504333, + "max": 0.26012101769447327, + "mean": -2.921331542893313e-06, + "std": 0.035985857248306274, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23526135087013245, + "max": 0.20543411374092102, + "mean": 0.00024757458595559, + "std": 0.05601666867733002, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.4347652792930603, + "max": 0.32389530539512634, + "mean": 2.395988121861592e-05, + "std": 0.03412287309765816, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.541207790374756, + "max": 7.30653190612793, + "mean": -0.00736255943775177, + "std": 0.6987443566322327, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.3433501720428467, + "max": 0.361217200756073, + "mean": 0.0001032147411024198, + "std": 0.04784071072936058, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07378581166267395, + "max": 0.060352873057127, + "mean": 0.0009383288561366498, + "std": 0.01492984127253294, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.2561882436275482, + "max": 0.28616371750831604, + "mean": 5.244153726380318e-06, + "std": 0.04157177358865738, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.05515698716044426, + "max": 0.062612384557724, + "mean": 0.00012199293996673077, + "std": 0.007132581900805235, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.3.g": { + "min": 0.49441853165626526, + "max": 1.2188090085983276, + "mean": 1.013464331626892, + "std": 0.11732637882232666, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.0939745903015137, + "max": 1.0476189851760864, + "mean": -4.830169564229436e-05, + "std": 0.05242462456226349, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.22291964292526245, + "max": 0.17299318313598633, + "mean": -0.027209078893065453, + "std": 0.03627277910709381, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8831630349159241, + "max": 0.9219300150871277, + "mean": -0.00014596671098843217, + "std": 0.05330995097756386, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.17071670293807983, + "max": 0.3785896301269531, + "mean": 0.0033629729878157377, + "std": 0.03981942683458328, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.0.weight": { + "min": -0.7773581147193909, + "max": 0.721552848815918, + "mean": 1.7906297216541134e-05, + "std": 0.0461493544280529, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.25.1.g": { + "min": 0.33866649866104126, + "max": 1.4223623275756836, + "mean": 0.9482957124710083, + "std": 0.20650897920131683, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.7458724975585938, + "max": 1.7043527364730835, + "mean": 0.0002272979763802141, + "std": 0.1587107926607132, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.1964622735977173, + "max": 1.0986626148223877, + "mean": -0.009530629962682724, + "std": 0.20347940921783447, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.420305460691452, + "max": 0.42840367555618286, + "mean": 6.361818668665364e-05, + "std": 0.04802125319838524, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.700023651123047, + "max": 19.49565315246582, + "mean": -0.24793246388435364, + "std": 4.7666015625, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.3232991695404053, + "max": 0.4378996789455414, + "mean": -1.1727358469215687e-05, + "std": 0.04616958647966385, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.033631421625614166, + "max": 0.03664267063140869, + "mean": 0.0006392866489477456, + "std": 0.012905232608318329, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.7025362849235535, + "max": 0.6701837778091431, + "mean": 4.212657222524285e-05, + "std": 0.057898350059986115, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.07234562933444977, + "max": 0.06781232357025146, + "mean": -0.00013423134805634618, + "std": 0.012877929955720901, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.3.g": { + "min": 0.3804936408996582, + "max": 1.3917937278747559, + "mean": 1.0666232109069824, + "std": 0.21957866847515106, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6164886951446533, + "max": 0.7186930179595947, + "mean": 0.00011397639173083007, + "std": 0.05803186818957329, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.21819192171096802, + "max": 0.22446297109127045, + "mean": 0.006146667059510946, + "std": 0.04965293034911156, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6298643350601196, + "max": 0.8897628784179688, + "mean": 1.269071981369052e-05, + "std": 0.023556767031550407, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.50624680519104, + "max": 0.4730708599090576, + "mean": -0.0030176215805113316, + "std": 0.06914978474378586, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.norm_out.g": { + "min": 0.5384271144866943, + "max": 1.1763767004013062, + "mean": 0.7825473546981812, + "std": 0.09825034439563751, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.proj_out.weight": { + "min": -0.26688942313194275, + "max": 0.21287617087364197, + "mean": -0.00022272299975156784, + "std": 0.0540103055536747, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ] + }, + "transformer.proj_out.bias": { + "min": -0.23796546459197998, + "max": 0.014876163564622402, + "mean": -0.04389083757996559, + "std": 0.03420323133468628, + "sparsity": 0.0, + "shape": [ + 100 + ] + } + } +} \ No newline at end of file