diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,4683 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "recommendations": { + "focus_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ] + }, + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.43005406856536865, + "max": 0.29851898550987244, + "mean": -0.0025509949773550034, + "std": 0.042555101215839386, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ] + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.06313250213861465, + "max": 0.10729768127202988, + "mean": 0.0006133262650109828, + "std": 0.03408696502447128, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.41268208622932434, + "max": 0.8365541696548462, + "mean": -0.00020702443725895137, + "std": 0.02410811372101307, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.11502047628164291, + "max": 0.3207014203071594, + "mean": -0.00093841488705948, + "std": 0.019534854218363762, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.7852821350097656, + "max": 2.8634164333343506, + "mean": -0.00036539402208290994, + "std": 0.615379810333252, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ] + }, + "transformer.input_embed.proj.weight": { + "min": -0.27854230999946594, + "max": 0.38152772188186646, + "mean": 0.0004230512131471187, + "std": 0.042748332023620605, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ] + }, + "transformer.input_embed.proj.bias": { + "min": -0.22163018584251404, + "max": 0.20894938707351685, + "mean": -0.004489985294640064, + "std": 0.040880318731069565, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.4279509484767914, + "max": 0.47543206810951233, + "mean": 3.1694014523964142e-06, + "std": 0.02450772561132908, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.32420721650123596, + "max": 0.15700779855251312, + "mean": -0.04670684412121773, + "std": 0.051544804126024246, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.4101617932319641, + "max": 0.3544142544269562, + "mean": -0.00012779857206624, + "std": 0.02359919063746929, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.2289954274892807, + "max": 0.26173391938209534, + "mean": -0.029131349176168442, + "std": 0.04930002987384796, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.1.g": { + "min": 0.25456100702285767, + "max": 0.818419873714447, + "mean": 0.5253804922103882, + "std": 0.08069705218076706, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.2965428829193115, + "max": 0.26520034670829773, + "mean": -0.00042467008461244404, + "std": 0.03210080415010452, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.09260489046573639, + "max": 0.1250484734773636, + "mean": 0.0006493350956588984, + "std": 0.025727085769176483, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.2901724576950073, + "max": 0.281167596578598, + "mean": -7.525501860072836e-05, + "std": 0.030932163819670677, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.8939008712768555, + "max": 5.80875825881958, + "mean": -0.009307368658483028, + "std": 1.2948225736618042, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.4246821701526642, + "max": 0.34353208541870117, + "mean": 9.80871482170187e-05, + "std": 0.029952067881822586, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.02886669710278511, + "max": 0.027609167620539665, + "mean": -0.0003159984771627933, + "std": 0.01256631314754486, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.4538891911506653, + "max": 0.4482215344905853, + "mean": 2.2922111384104937e-05, + "std": 0.02385348081588745, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.08867117762565613, + "max": 0.09104129672050476, + "mean": 0.0022725451271981, + "std": 0.019507737830281258, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.3.g": { + "min": 0.26674631237983704, + "max": 1.054079532623291, + "mean": 0.5310790538787842, + "std": 0.10425138473510742, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5743944644927979, + "max": 0.6082407832145691, + "mean": -0.00042930786730721593, + "std": 0.03859541565179825, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.18188051879405975, + "max": 0.04570186883211136, + "mean": -0.029450394213199615, + "std": 0.04259800165891647, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.1662050485610962, + "max": 1.6339434385299683, + "mean": 0.00032052083406597376, + "std": 0.027692945674061775, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.16221286356449127, + "max": 0.2055274099111557, + "mean": -0.021118517965078354, + "std": 0.027932317927479744, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.1.g": { + "min": 0.22425268590450287, + "max": 0.8419703841209412, + "mean": 0.48751628398895264, + "std": 0.0750974491238594, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.2551511526107788, + "max": 0.30577754974365234, + "mean": -8.399176294915378e-06, + "std": 0.03346917778253555, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.09521990269422531, + "max": 0.11036473512649536, + "mean": 6.435990508180112e-05, + "std": 0.026954451575875282, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.2969436049461365, + "max": 0.29559123516082764, + "mean": 5.0998860388062894e-05, + "std": 0.032539013773202896, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.159433841705322, + "max": 5.079733371734619, + "mean": -0.014565235003829002, + "std": 1.156693696975708, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.3445141315460205, + "max": 0.3432990610599518, + "mean": 7.890153938205913e-05, + "std": 0.03005831316113472, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.03612125664949417, + "max": 0.03314004838466644, + "mean": -0.00014305136573966593, + "std": 0.013020108453929424, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.3150654435157776, + "max": 0.3748987019062042, + "mean": -2.0872395907645114e-05, + "std": 0.02405514195561409, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.10531895607709885, + "max": 0.12192098051309586, + "mean": -0.0019657753873616457, + "std": 0.028842739760875702, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.3.g": { + "min": 0.3119339942932129, + "max": 1.1190955638885498, + "mean": 0.6662184000015259, + "std": 0.09769617766141891, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.8722184300422668, + "max": 0.6274752616882324, + "mean": 0.0016759471036493778, + "std": 0.047436658293008804, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.27076128125190735, + "max": 0.034267961978912354, + "mean": -0.046592649072408676, + "std": 0.040578801184892654, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.9206072688102722, + "max": 0.96403568983078, + "mean": 0.0010221146512776613, + "std": 0.040701672434806824, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.14442752301692963, + "max": 0.0748896598815918, + "mean": -0.009088763035833836, + "std": 0.02569626271724701, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.1.g": { + "min": 0.23972344398498535, + "max": 0.7111932635307312, + "mean": 0.44715946912765503, + "std": 0.05921364948153496, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.27250099182128906, + "max": 0.297283798456192, + "mean": 8.777939001447521e-06, + "std": 0.03547067567706108, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.11882907897233963, + "max": 0.1182771623134613, + "mean": 0.0007498766062781215, + "std": 0.027608048170804977, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.2806638181209564, + "max": 0.27924486994743347, + "mean": -7.666053716093302e-05, + "std": 0.03510000556707382, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.5072221755981445, + "max": 2.5192060470581055, + "mean": 0.026715079322457314, + "std": 0.586592435836792, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.22091323137283325, + "max": 0.2714807987213135, + "mean": 2.762420081126038e-06, + "std": 0.030731365084648132, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.03329985961318016, + "max": 0.031178824603557587, + "mean": 0.00011736361193470657, + "std": 0.012398799881339073, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.2350921630859375, + "max": 0.23149597644805908, + "mean": 5.688454257324338e-05, + "std": 0.025696979835629463, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.13562175631523132, + "max": 0.1278066188097, + "mean": -0.00549966748803854, + "std": 0.039964329451322556, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.3.g": { + "min": 0.3545263111591339, + "max": 1.1705567836761475, + "mean": 0.7105071544647217, + "std": 0.10373809188604355, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6171801686286926, + "max": 0.5549061298370361, + "mean": 0.0011606733314692974, + "std": 0.04611368104815483, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.1888936311006546, + "max": 0.024856731295585632, + "mean": -0.034840360283851624, + "std": 0.028601042926311493, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.1303929090499878, + "max": 0.9700294137001038, + "mean": 0.00035928928991779685, + "std": 0.04234178736805916, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.5973078012466431, + "max": 0.06291170418262482, + "mean": -0.004878643434494734, + "std": 0.028604039922356606, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.1.g": { + "min": 0.3753509521484375, + "max": 0.9391864538192749, + "mean": 0.5924164056777954, + "std": 0.06680406630039215, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.391277939081192, + "max": 0.36899876594543457, + "mean": 7.035685848677531e-05, + "std": 0.03718537837266922, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11886083334684372, + "max": 0.1363811194896698, + "mean": 0.0009265001863241196, + "std": 0.029201578348875046, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.6185654401779175, + "max": 0.5083082914352417, + "mean": 1.5324059859267436e-05, + "std": 0.0364382304251194, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.179115295410156, + "max": 8.780653953552246, + "mean": -0.10920821875333786, + "std": 1.697803258895874, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.27624833583831787, + "max": 0.23940874636173248, + "mean": 5.239578240434639e-05, + "std": 0.0326123982667923, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.05171733349561691, + "max": 0.039454903453588486, + "mean": 9.008367487695068e-05, + "std": 0.012963240966200829, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.2306506633758545, + "max": 0.23440538346767426, + "mean": -2.216407301602885e-05, + "std": 0.02938910946249962, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.2041204422712326, + "max": 0.1051875501871109, + "mean": -0.004020026419311762, + "std": 0.03262867406010628, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.3.g": { + "min": 0.3396590054035187, + "max": 1.0105489492416382, + "mean": 0.7007004022598267, + "std": 0.0967300534248352, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5642524361610413, + "max": 0.8327149152755737, + "mean": 0.0004152198671363294, + "std": 0.04229423776268959, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.21180973947048187, + "max": 0.030382230877876282, + "mean": -0.032180484384298325, + "std": 0.02649112045764923, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7539102435112, + "max": 0.7183676958084106, + "mean": -1.6375699487980455e-05, + "std": 0.03683510050177574, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.26317542791366577, + "max": 0.10612691938877106, + "mean": -0.003012202214449644, + "std": 0.028860073536634445, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.1.g": { + "min": 0.28410062193870544, + "max": 0.6937515735626221, + "mean": 0.49938827753067017, + "std": 0.04646085575222969, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.27815356850624084, + "max": 0.233821839094162, + "mean": -0.00011090396583313122, + "std": 0.03875657916069031, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15374495089054108, + "max": 0.126325324177742, + "mean": -0.0022300099954009056, + "std": 0.033342309296131134, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.4138854146003723, + "max": 0.6591927409172058, + "mean": -1.8888074919232167e-05, + "std": 0.03909528627991676, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.2339067459106445, + "max": 4.718007564544678, + "mean": -0.020461430773139, + "std": 1.007363200187683, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.2449360489845276, + "max": 0.207246333360672, + "mean": 4.3898020521737635e-05, + "std": 0.033962249755859375, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.03454353287816048, + "max": 0.04481153190135956, + "mean": -1.8621416529640555e-05, + "std": 0.01263485848903656, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.20073898136615753, + "max": 0.20600160956382751, + "mean": -2.920800579886418e-05, + "std": 0.0310201458632946, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.1997092068195343, + "max": 0.11323567479848862, + "mean": -0.002894954290241003, + "std": 0.0345144160091877, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.3.g": { + "min": 0.36691704392433167, + "max": 1.0552048683166504, + "mean": 0.670504629611969, + "std": 0.06634049117565155, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.39792558550834656, + "max": 0.5017094612121582, + "mean": -3.8320780731737614e-05, + "std": 0.04113030061125755, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.12866847217082977, + "max": 0.026868799701333046, + "mean": -0.030530910938978195, + "std": 0.02187257632613182, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.4486997127532959, + "max": 0.4325278401374817, + "mean": 7.570705201942474e-05, + "std": 0.03489042818546295, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.26739102602005005, + "max": 0.07290376722812653, + "mean": -0.001090540667064488, + "std": 0.023126306012272835, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.1.g": { + "min": 0.28740835189819336, + "max": 0.6838006973266602, + "mean": 0.5244842767715454, + "std": 0.04748576506972313, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.22222448885440826, + "max": 0.22337274253368378, + "mean": 1.5597350284224376e-05, + "std": 0.038948558270931244, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.1362549066543579, + "max": 0.1092236116528511, + "mean": 0.00024021141871344298, + "std": 0.029209597036242485, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.37488552927970886, + "max": 0.43708565831184387, + "mean": -9.820145351113752e-06, + "std": 0.039285808801651, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.8422415256500244, + "max": 4.994611740112305, + "mean": 0.009733816608786583, + "std": 0.8449002504348755, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.22278591990470886, + "max": 0.21995313465595245, + "mean": -2.4143082555383444e-07, + "std": 0.03440921753644943, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.04355766996741295, + "max": 0.03580183535814285, + "mean": -0.0002584094472695142, + "std": 0.012078197672963142, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.21266809105873108, + "max": 0.18842695653438568, + "mean": -1.707848787191324e-05, + "std": 0.03153562918305397, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.18067854642868042, + "max": 0.12067519873380661, + "mean": -0.0023923253174871206, + "std": 0.04126231372356415, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.3.g": { + "min": 0.42283520102500916, + "max": 0.9399095773696899, + "mean": 0.6626414060592651, + "std": 0.056763265281915665, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.37058448791503906, + "max": 0.4756770133972168, + "mean": -8.219464507419616e-05, + "std": 0.040889278054237366, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.20835021138191223, + "max": 0.027245184406638145, + "mean": -0.03023524209856987, + "std": 0.02135040983557701, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.3404720425605774, + "max": 0.7332155108451843, + "mean": 8.202612661989406e-05, + "std": 0.03476588428020477, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.2399250864982605, + "max": 0.050362419337034225, + "mean": -0.0011862949468195438, + "std": 0.020457014441490173, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.1.g": { + "min": 0.306090772151947, + "max": 0.6522687077522278, + "mean": 0.5250887274742126, + "std": 0.0460890494287014, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.3040372133255005, + "max": 0.21722179651260376, + "mean": 7.015860319370404e-05, + "std": 0.0394948311150074, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.14904865622520447, + "max": 0.1309719830751419, + "mean": 0.0003389039193280041, + "std": 0.03043319098651409, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.2568168342113495, + "max": 0.20181529223918915, + "mean": 3.114001810899936e-05, + "std": 0.039484698325395584, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.3340678215026855, + "max": 2.373654365539551, + "mean": -0.026232335716485977, + "std": 0.4496069550514221, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.18832948803901672, + "max": 0.2102191150188446, + "mean": 3.7190951843513176e-05, + "std": 0.03479335457086563, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.03177480027079582, + "max": 0.03555988520383835, + "mean": -0.00019898739992640913, + "std": 0.012286651879549026, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.1882997751235962, + "max": 0.16997897624969482, + "mean": -6.833271618233994e-05, + "std": 0.03217003867030144, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.13938407599925995, + "max": 0.1373613476753235, + "mean": -0.0025095485616475344, + "std": 0.051287971436977386, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.3.g": { + "min": 0.4670821726322174, + "max": 0.9539185762405396, + "mean": 0.6688235998153687, + "std": 0.05267348513007164, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.3240530490875244, + "max": 0.30894580483436584, + "mean": -9.802424756344408e-07, + "std": 0.04094521328806877, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.12482603639364243, + "max": 0.025560826063156128, + "mean": -0.030691375955939293, + "std": 0.01981331594288349, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.4391370117664337, + "max": 0.4447336196899414, + "mean": 9.505114576313645e-05, + "std": 0.03511868044734001, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.22435998916625977, + "max": 0.051745057106018066, + "mean": -0.0011790611315518618, + "std": 0.018466567620635033, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.1.g": { + "min": 0.339127779006958, + "max": 0.7379522323608398, + "mean": 0.5586450695991516, + "std": 0.041346412152051926, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.27276721596717834, + "max": 0.2783542275428772, + "mean": 2.0316545487730764e-05, + "std": 0.04105677455663681, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.13677620887756348, + "max": 0.13981792330741882, + "mean": 0.0004895473830401897, + "std": 0.026616644114255905, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.4901849925518036, + "max": 0.3555382788181305, + "mean": 8.898908708943054e-05, + "std": 0.04069453105330467, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.2957122325897217, + "max": 1.7441315650939941, + "mean": -0.02107611857354641, + "std": 0.5000779628753662, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.2175905406475067, + "max": 0.19755098223686218, + "mean": -4.055129102198407e-05, + "std": 0.03423253819346428, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.041273877024650574, + "max": 0.038862332701683044, + "mean": -0.0001397906889906153, + "std": 0.012886369600892067, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17747005820274353, + "max": 0.1828984022140503, + "mean": 4.791315950569697e-05, + "std": 0.03155587986111641, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.17983144521713257, + "max": 0.1835365742444992, + "mean": -0.0022142226807773113, + "std": 0.054839469492435455, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.3.g": { + "min": 0.4742608368396759, + "max": 1.0234043598175049, + "mean": 0.645187497138977, + "std": 0.050187092274427414, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.2714308202266693, + "max": 0.3094487190246582, + "mean": 0.00011228019138798118, + "std": 0.04068155214190483, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.1052371934056282, + "max": 0.026651456952095032, + "mean": -0.029516855254769325, + "std": 0.017926618456840515, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.33875298500061035, + "max": 0.3289111852645874, + "mean": 5.248367233434692e-05, + "std": 0.03441265597939491, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.1814928501844406, + "max": 0.04225185513496399, + "mean": -0.0010585930431261659, + "std": 0.017206743359565735, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.1.g": { + "min": 0.325328528881073, + "max": 0.6851887106895447, + "mean": 0.5111891627311707, + "std": 0.03689680993556976, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.2336086481809616, + "max": 0.2251969277858734, + "mean": -3.625164390541613e-05, + "std": 0.039176031947135925, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.11540839821100235, + "max": 0.13177232444286346, + "mean": 0.00015377491945400834, + "std": 0.029171116650104523, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.35232973098754883, + "max": 0.2849805951118469, + "mean": 6.946377197891707e-06, + "std": 0.0392446406185627, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.128444194793701, + "max": 3.5404324531555176, + "mean": -0.011580632999539375, + "std": 0.6822744011878967, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.21085655689239502, + "max": 0.20925314724445343, + "mean": 3.461689630057663e-05, + "std": 0.03448476642370224, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.03582029417157173, + "max": 0.0481770783662796, + "mean": 0.000791961036156863, + "std": 0.012865905649960041, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.2102348804473877, + "max": 0.19295428693294525, + "mean": -1.266141225642059e-06, + "std": 0.03169584646821022, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.18637949228286743, + "max": 0.17694726586341858, + "mean": -0.0028348618652671576, + "std": 0.058624111115932465, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.3.g": { + "min": 0.47455769777297974, + "max": 1.0399035215377808, + "mean": 0.6513059735298157, + "std": 0.049517374485731125, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.2480839341878891, + "max": 0.32886141538619995, + "mean": 0.00018076057313010097, + "std": 0.040569957345724106, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.12484849989414215, + "max": 0.024815550073981285, + "mean": -0.030500907450914383, + "std": 0.01760847680270672, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.42022550106048584, + "max": 0.4810453951358795, + "mean": -1.3774351828033105e-06, + "std": 0.03539680689573288, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.15139424800872803, + "max": 0.04337864741683006, + "mean": 4.9671380111249164e-05, + "std": 0.014884358271956444, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.1.g": { + "min": 0.3155671954154968, + "max": 0.6806262135505676, + "mean": 0.5528896450996399, + "std": 0.04069091007113457, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.20614612102508545, + "max": 0.2194698005914688, + "mean": 3.180014027748257e-05, + "std": 0.038299210369586945, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.13776730000972748, + "max": 0.11263402551412582, + "mean": 2.7509784558787942e-05, + "std": 0.02582019381225109, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.4022030830383301, + "max": 0.3703415095806122, + "mean": 2.5775392714422196e-05, + "std": 0.03817988187074661, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.767340898513794, + "max": 2.8659963607788086, + "mean": 0.0011514686048030853, + "std": 0.5165835022926331, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.20330490171909332, + "max": 0.1975128948688507, + "mean": 2.9661892767762765e-05, + "std": 0.03429696336388588, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.05067470669746399, + "max": 0.03985888883471489, + "mean": -0.0004201547708362341, + "std": 0.013416973873972893, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.19610381126403809, + "max": 0.20185545086860657, + "mean": -1.2482038982852828e-05, + "std": 0.031804922968149185, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.19282294809818268, + "max": 0.19485345482826233, + "mean": -0.0029612130019813776, + "std": 0.06253436952829361, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.3.g": { + "min": 0.3490590453147888, + "max": 1.081492304801941, + "mean": 0.6670613884925842, + "std": 0.05502287670969963, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22548414766788483, + "max": 0.2509278655052185, + "mean": 0.00035874126479029655, + "std": 0.04075963795185089, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.0911286398768425, + "max": 0.043736688792705536, + "mean": -0.03008149564266205, + "std": 0.017609886825084686, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.3527411222457886, + "max": 0.30355900526046753, + "mean": -4.3905802158406004e-05, + "std": 0.037122152745723724, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.16155573725700378, + "max": 0.06323426961898804, + "mean": -8.016945503186435e-05, + "std": 0.019409824162721634, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.1.g": { + "min": 0.34882256388664246, + "max": 0.7205829620361328, + "mean": 0.5423275232315063, + "std": 0.03903055191040039, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.21910026669502258, + "max": 0.2230084389448166, + "mean": -1.1230863492528442e-05, + "std": 0.03923042118549347, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.11831706017255783, + "max": 0.17028944194316864, + "mean": 0.0002854751655831933, + "std": 0.02510806918144226, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.24612674117088318, + "max": 0.3002479076385498, + "mean": -3.693345206556842e-05, + "std": 0.03892989829182625, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.501706838607788, + "max": 3.7109532356262207, + "mean": 0.015846284106373787, + "std": 0.7818700075149536, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.2186352014541626, + "max": 0.2372058928012848, + "mean": -1.3363219295570161e-05, + "std": 0.03630276769399643, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04711708053946495, + "max": 0.05125221982598305, + "mean": 0.00047675782116129994, + "std": 0.013513283804059029, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.2137574851512909, + "max": 0.2170482724905014, + "mean": 5.6474542361684144e-05, + "std": 0.033615030348300934, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.21112386882305145, + "max": 0.23111283779144287, + "mean": -0.005101324524730444, + "std": 0.06186835095286369, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.3.g": { + "min": 0.36194419860839844, + "max": 1.0987720489501953, + "mean": 0.6991980671882629, + "std": 0.05339714512228966, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.23452329635620117, + "max": 0.24459832906723022, + "mean": 0.0004634420620277524, + "std": 0.041268572211265564, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.09795372933149338, + "max": 0.0681690126657486, + "mean": -0.031430941075086594, + "std": 0.018122123554348946, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.3014773726463318, + "max": 0.3510685861110687, + "mean": -8.210168743971735e-05, + "std": 0.04027429223060608, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.15211886167526245, + "max": 0.14952634274959564, + "mean": 0.0002581052831374109, + "std": 0.023030627518892288, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.1.g": { + "min": 0.9992543458938599, + "max": 1.000257968902588, + "mean": 0.9997284412384033, + "std": 0.00024261184444185346, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.031257662922143936, + "max": 0.03125471994280815, + "mean": -1.929123027366586e-05, + "std": 0.018041206523776054, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.03122766688466072, + "max": 0.030988017097115517, + "mean": -0.0010841797338798642, + "std": 0.01795079931616783, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.031254444271326065, + "max": 0.031258873641490936, + "mean": 3.5479256439430173e-06, + "std": 0.018041614443063736, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.031154906377196312, + "max": 0.03117496706545353, + "mean": 0.0003339025133755058, + "std": 0.018063001334667206, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": -0.0006141028716228902, + "max": 0.0004136512288823724, + "mean": 1.3743268709731638e-06, + "std": 0.0001376789587084204, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.3.g": { + "min": 0.9981284141540527, + "max": 1.001622200012207, + "mean": 0.9998474717140198, + "std": 0.0006079401355236769, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.032770540565252304, + "max": 0.032834719866514206, + "mean": -6.686397682642564e-06, + "std": 0.01804281771183014, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.032758843153715134, + "max": 0.03259320184588432, + "mean": -0.00013118298375047743, + "std": 0.017956331372261047, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.11.4.ff.2.weight": { + "min": -0.001173654804006219, + "max": 0.0011514672078192234, + "mean": 3.6397079838934587e-07, + "std": 0.00021431130880955607, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.11.4.ff.2.bias": { + "min": -0.0005246364744380116, + "max": 0.000398451229557395, + "mean": 2.265020839331555e-06, + "std": 0.0001267467887373641, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.1.g": { + "min": 0.38304001092910767, + "max": 0.717822790145874, + "mean": 0.5806512236595154, + "std": 0.03879348561167717, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.2381902039051056, + "max": 0.1962050199508667, + "mean": 2.6112733394256793e-05, + "std": 0.03746553510427475, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.11878937482833862, + "max": 0.16630207002162933, + "mean": 0.0009804379660636187, + "std": 0.027551008388400078, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.24597673118114471, + "max": 0.499647855758667, + "mean": -5.027425504522398e-05, + "std": 0.03762295842170715, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.9381461143493652, + "max": 3.7654519081115723, + "mean": -0.003569968044757843, + "std": 0.6810594201087952, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.22724951803684235, + "max": 0.25177428126335144, + "mean": -1.1575086318771355e-05, + "std": 0.037434518337249756, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07160108536481857, + "max": 0.08055920898914337, + "mean": -0.0005123723531141877, + "std": 0.015660181641578674, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.22791653871536255, + "max": 0.25741860270500183, + "mean": -2.8733527869917452e-05, + "std": 0.035421404987573624, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.20038263499736786, + "max": 0.21485595405101776, + "mean": -0.005531632341444492, + "std": 0.06833721697330475, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.3.g": { + "min": 0.4051814377307892, + "max": 1.186793327331543, + "mean": 0.7378474473953247, + "std": 0.055015575140714645, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.2207704335451126, + "max": 0.24539422988891602, + "mean": 0.0005212163086980581, + "std": 0.04133594036102295, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.10323301702737808, + "max": 0.02423531748354435, + "mean": -0.03266426920890808, + "std": 0.018886635079979897, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.44897761940956116, + "max": 0.42180517315864563, + "mean": -0.0004341494059190154, + "std": 0.04689624160528183, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.25117069482803345, + "max": 0.46963006258010864, + "mean": 0.003201500279828906, + "std": 0.044517986476421356, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.0.weight": { + "min": -0.3168058395385742, + "max": 0.3330129086971283, + "mean": -2.5202643882948905e-05, + "std": 0.021287493407726288, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.13.1.g": { + "min": 0.32449325919151306, + "max": 0.6839006543159485, + "mean": 0.5709657073020935, + "std": 0.04467146471142769, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.16424405574798584, + "max": 0.1741371899843216, + "mean": -4.883421570411883e-05, + "std": 0.033180903643369675, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18656986951828003, + "max": 0.14275068044662476, + "mean": 4.2517087422311306e-05, + "std": 0.029676001518964767, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.3805179297924042, + "max": 0.24586445093154907, + "mean": -9.98385530692758e-06, + "std": 0.03276193141937256, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.6520333290100098, + "max": 3.2866697311401367, + "mean": -0.01423930749297142, + "std": 0.984977662563324, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.23466402292251587, + "max": 0.24725867807865143, + "mean": -1.800561039999593e-05, + "std": 0.04169729724526405, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.07259472459554672, + "max": 0.15434128046035767, + "mean": 0.0006652789888903499, + "std": 0.02516855113208294, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.2662595510482788, + "max": 0.24813267588615417, + "mean": -1.5347548469435424e-05, + "std": 0.04013809189200401, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.18939754366874695, + "max": 0.19454091787338257, + "mean": -0.0012339097447693348, + "std": 0.06667902320623398, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.3.g": { + "min": 0.32912713289260864, + "max": 0.9980567097663879, + "mean": 0.7191190719604492, + "std": 0.05222564935684204, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.23154447972774506, + "max": 0.2451959252357483, + "mean": 0.00018269156862515956, + "std": 0.04089995473623276, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11424808949232101, + "max": 0.01902252808213234, + "mean": -0.04247482866048813, + "std": 0.018848657608032227, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.3893679976463318, + "max": 0.4069530963897705, + "mean": -2.1458035917021334e-05, + "std": 0.04853350669145584, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.6924692392349243, + "max": 0.4121605455875397, + "mean": 0.0008477990049868822, + "std": 0.06026294827461243, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.0.weight": { + "min": -0.0010412124684080482, + "max": 1.00050687789917, + "mean": 0.00048820613301359117, + "std": 0.02208906039595604, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.14.1.g": { + "min": 0.9985182881355286, + "max": 1.000278115272522, + "mean": 0.9996296167373657, + "std": 0.0004832371196243912, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.03125324100255966, + "max": 0.03125615417957306, + "mean": -2.1021265638410114e-05, + "std": 0.01803254708647728, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.03121461719274521, + "max": 0.031231539323925972, + "mean": -0.0006769909523427486, + "std": 0.017827048897743225, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.03125639632344246, + "max": 0.031260956078767776, + "mean": -8.831522791297175e-06, + "std": 0.018031572923064232, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.03123198263347149, + "max": 0.031244853511452675, + "mean": -0.0007297562551684678, + "std": 0.017941949889063835, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": -0.0004176551883574575, + "max": 0.0003318839881103486, + "mean": -3.140859689665376e-06, + "std": 0.00011632459791144356, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.3.g": { + "min": 0.9979198575019836, + "max": 1.0014318227767944, + "mean": 0.9994964599609375, + "std": 0.0006108160014264286, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.03245294839143753, + "max": 0.032378438860177994, + "mean": -1.7318175196123775e-06, + "std": 0.018028022721409798, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.03213566541671753, + "max": 0.03115900792181492, + "mean": -0.0003739359090104699, + "std": 0.018043629825115204, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.14.4.ff.2.weight": { + "min": -0.0012771300971508026, + "max": 0.0011123745935037732, + "mean": -8.958944022197102e-07, + "std": 0.00020973320351913571, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.14.4.ff.2.bias": { + "min": -0.00034164811950176954, + "max": 0.0002967154432553798, + "mean": -3.7618522128468612e-06, + "std": 0.00010472961730556563, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.0.weight": { + "min": -0.2341979742050171, + "max": 0.27227067947387695, + "mean": 6.760874839528697e-06, + "std": 0.01880943961441517, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.15.1.g": { + "min": 0.32133588194847107, + "max": 0.6926518678665161, + "mean": 0.5816141963005066, + "std": 0.04592034965753555, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.1816624104976654, + "max": 0.19737666845321655, + "mean": -1.1567326509975828e-05, + "std": 0.03318365663290024, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.16045045852661133, + "max": 0.12930794060230255, + "mean": -0.0010751842055469751, + "std": 0.03413202986121178, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.3320204019546509, + "max": 0.31095007061958313, + "mean": -1.016673104459187e-05, + "std": 0.032234374433755875, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.795230388641357, + "max": 8.753500938415527, + "mean": 0.09339793026447296, + "std": 1.6184653043746948, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23359645903110504, + "max": 0.2416210174560547, + "mean": 4.149888991378248e-05, + "std": 0.04085618630051613, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07583926618099213, + "max": 0.06566201150417328, + "mean": 0.0004832554841414094, + "std": 0.01940709352493286, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.24546822905540466, + "max": 0.23373769223690033, + "mean": -3.0527116905432194e-06, + "std": 0.03943083807826042, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.16301113367080688, + "max": 0.16089561581611633, + "mean": 0.0016276519745588303, + "std": 0.06527570635080338, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.3.g": { + "min": 0.556946873664856, + "max": 0.9415686726570129, + "mean": 0.7127838134765625, + "std": 0.03996752202510834, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.22765818238258362, + "max": 0.25477662682533264, + "mean": -4.5632557885255665e-05, + "std": 0.04057467356324196, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.1348292976617813, + "max": 0.022138668224215508, + "mean": -0.04134812578558922, + "std": 0.01838543266057968, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.42094686627388, + "max": 0.3921053111553192, + "mean": -4.4014304876327515e-06, + "std": 0.04778384044766426, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.6069029569625854, + "max": 0.6509266495704651, + "mean": 0.0015840512933209538, + "std": 0.05682184174656868, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.0.weight": { + "min": -0.25153595209121704, + "max": 0.320549339056015, + "mean": -6.0848738030472305e-06, + "std": 0.019612807780504227, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.16.1.g": { + "min": 0.35961607098579407, + "max": 0.6813214421272278, + "mean": 0.570705771446228, + "std": 0.04296967759728432, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.22012382745742798, + "max": 0.17660681903362274, + "mean": -3.47153763868846e-05, + "std": 0.03429870679974556, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.1630830317735672, + "max": 0.23280400037765503, + "mean": 0.00036220261245034635, + "std": 0.03281139209866524, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.263581246137619, + "max": 0.23967352509498596, + "mean": -5.2856208640150726e-05, + "std": 0.03389754518866539, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.849710464477539, + "max": 5.085712909698486, + "mean": 0.043873172253370285, + "std": 1.2286995649337769, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.24600939452648163, + "max": 0.25006523728370667, + "mean": 7.234106305986643e-05, + "std": 0.04398686811327934, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.06254445016384125, + "max": 0.054417435079813004, + "mean": 0.0006422345177270472, + "std": 0.017186632379889488, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.28586557507514954, + "max": 0.2718929648399353, + "mean": -5.018173033022322e-05, + "std": 0.0429849736392498, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.1608622968196869, + "max": 0.17021305859088898, + "mean": -0.0028866538777947426, + "std": 0.05928993597626686, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.3.g": { + "min": 0.519731879234314, + "max": 0.9308202266693115, + "mean": 0.7133743166923523, + "std": 0.03828318044543266, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23790688812732697, + "max": 0.24848711490631104, + "mean": 0.00046475647832266986, + "std": 0.04045366868376732, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.14495447278022766, + "max": 0.04111183062195778, + "mean": -0.039693139493465424, + "std": 0.020540453493595123, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.5317410826683044, + "max": 0.581489622592926, + "mean": 5.736372258979827e-06, + "std": 0.04885946586728096, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5184876322746277, + "max": 0.4928899109363556, + "mean": 0.002365314168855548, + "std": 0.05342720076441765, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.0.weight": { + "min": -0.27367857098579407, + "max": 0.3154536187648773, + "mean": 2.0265892999304924e-06, + "std": 0.020049458369612694, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.17.1.g": { + "min": 0.36605367064476013, + "max": 0.7104601860046387, + "mean": 0.5931398272514343, + "std": 0.04595194756984711, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.21068720519542694, + "max": 0.19896060228347778, + "mean": 3.061807728954591e-05, + "std": 0.03486604616045952, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.18698948621749878, + "max": 0.20358456671237946, + "mean": 0.0009543596534058452, + "std": 0.03149386867880821, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.2894982397556305, + "max": 0.339619904756546, + "mean": -4.7122804971877486e-05, + "std": 0.034586917608976364, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.8732118606567383, + "max": 3.3837733268737793, + "mean": 0.014458216726779938, + "std": 0.8580982089042664, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.224315345287323, + "max": 0.24964982271194458, + "mean": -3.871130957122659e-06, + "std": 0.042229585349559784, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.055275678634643555, + "max": 0.04663092643022537, + "mean": -1.647317549213767e-05, + "std": 0.015846259891986847, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.2928326427936554, + "max": 0.29024964570999146, + "mean": -7.346136044361629e-06, + "std": 0.04194441810250282, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.12488731741905212, + "max": 0.2587108016014099, + "mean": -0.0032421478535979986, + "std": 0.05317580699920654, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.3.g": { + "min": 0.4563259780406952, + "max": 0.8424069881439209, + "mean": 0.7054323554039001, + "std": 0.03509839251637459, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.5117396712303162, + "max": 0.34794938564300537, + "mean": 0.00034281908301636577, + "std": 0.04019879177212715, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.1857415735721588, + "max": 0.03958635777235031, + "mean": -0.03938839212059975, + "std": 0.021348465234041214, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.5434486865997314, + "max": 0.5551662445068359, + "mean": -7.160313543863595e-05, + "std": 0.050734180957078934, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5112110376358032, + "max": 0.6635048389434814, + "mean": 0.002443352248519659, + "std": 0.04949941858649254, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.0.weight": { + "min": -0.3325079083442688, + "max": 0.2651371359825134, + "mean": 3.4327572393522132e-06, + "std": 0.019386671483516693, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.18.1.g": { + "min": 0.3219457268714905, + "max": 0.7650159597396851, + "mean": 0.6510248780250549, + "std": 0.04531543329358101, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.24919819831848145, + "max": 0.21938340365886688, + "mean": -2.0984125512768514e-06, + "std": 0.03650059178471565, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.32654333114624023, + "max": 0.2866538465023041, + "mean": -0.0006891752709634602, + "std": 0.03852362558245659, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.30977994203567505, + "max": 0.36965611577033997, + "mean": 6.506919453386217e-05, + "std": 0.03624110668897629, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.713971138000488, + "max": 5.803556442260742, + "mean": 0.03793709725141525, + "std": 1.412732481956482, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.22124992311000824, + "max": 0.20528917014598846, + "mean": -7.50878534745425e-05, + "std": 0.042485084384679794, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.07763200253248215, + "max": 0.05141681060194969, + "mean": -0.0009281833190470934, + "std": 0.01641252264380455, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.33066344261169434, + "max": 0.32909321784973145, + "mean": -4.5878937271481846e-06, + "std": 0.04279147461056709, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.2844299376010895, + "max": 0.1119050681591034, + "mean": -0.001205054228194058, + "std": 0.0470142662525177, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.3.g": { + "min": 0.48612144589424133, + "max": 0.8848820328712463, + "mean": 0.7373377084732056, + "std": 0.03814017400145531, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.36209484934806824, + "max": 0.2740732431411743, + "mean": 5.125169991515577e-05, + "std": 0.04064430668950081, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.2473653107881546, + "max": 0.046401649713516235, + "mean": -0.03926541656255722, + "std": 0.02327280305325985, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.6253157258033752, + "max": 0.5961773991584778, + "mean": -6.133734132163227e-05, + "std": 0.0531163364648819, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.7087676525115967, + "max": 0.2656005322933197, + "mean": 0.0009179539047181606, + "std": 0.05120791867375374, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.0.weight": { + "min": -0.3432927131652832, + "max": 0.3036082684993744, + "mean": 1.7233912785741268e-07, + "std": 0.01913507841527462, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.19.1.g": { + "min": 0.34983396530151367, + "max": 0.78127521276474, + "mean": 0.6388033628463745, + "std": 0.04922258108854294, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.20482076704502106, + "max": 0.20643775165081024, + "mean": -5.993415470584296e-05, + "std": 0.037695497274398804, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.2582729458808899, + "max": 0.2677401304244995, + "mean": -0.0004000938788522035, + "std": 0.04457787051796913, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.3535555303096771, + "max": 0.3218846917152405, + "mean": -7.005222414591117e-06, + "std": 0.03720390424132347, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.2560133934021, + "max": 4.200046062469482, + "mean": -0.026399940252304077, + "std": 1.0062882900238037, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.2381831258535385, + "max": 0.24307270348072052, + "mean": -2.52762038144283e-05, + "std": 0.0432097353041172, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.0622570626437664, + "max": 0.05666593089699745, + "mean": 0.0003454152902122587, + "std": 0.014151728712022305, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.43709275126457214, + "max": 0.37350907921791077, + "mean": 1.4359582564793527e-05, + "std": 0.04412123188376427, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.09637399762868881, + "max": 0.17579396069049835, + "mean": -0.00066028768196702, + "std": 0.035156894475221634, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.3.g": { + "min": 0.4216686189174652, + "max": 1.067047357559204, + "mean": 0.7483223080635071, + "std": 0.04198553413152695, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.26631179451942444, + "max": 0.2965000867843628, + "mean": -7.944944081827998e-05, + "std": 0.040804266929626465, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.1849687099456787, + "max": 0.04366198182106018, + "mean": -0.03681465983390808, + "std": 0.025593994185328484, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.4571255147457123, + "max": 0.4859236776828766, + "mean": 4.341108797234483e-05, + "std": 0.05420951172709465, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.28613921999931335, + "max": 0.5508683919906616, + "mean": -0.0008792161825112998, + "std": 0.04781510680913925, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.0.weight": { + "min": -0.2926841676235199, + "max": 0.3227182626724243, + "mean": 6.155195478640962e-06, + "std": 0.019968634471297264, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.20.1.g": { + "min": 0.29101473093032837, + "max": 0.7585480213165283, + "mean": 0.6508181095123291, + "std": 0.05212597921490669, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.24345757067203522, + "max": 0.2612913250923157, + "mean": -6.02660793447285e-06, + "std": 0.03961166366934776, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.2671639025211334, + "max": 0.19983193278312683, + "mean": -0.0008803074015304446, + "std": 0.05174032971262932, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.2718494236469269, + "max": 0.25337839126586914, + "mean": 4.495690518524498e-06, + "std": 0.0387086495757103, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_k.bias": { + "min": -12.951557159423828, + "max": 15.930760383605957, + "mean": 0.03321323171257973, + "std": 1.9877210855484009, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.2069142907857895, + "max": 0.225667342543602, + "mean": -7.223337888717651e-05, + "std": 0.04055356606841087, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06923694908618927, + "max": 0.06314270943403244, + "mean": 0.00015547810471616685, + "std": 0.0147401699796319, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.4649372100830078, + "max": 0.3204408884048462, + "mean": 1.968499054783024e-05, + "std": 0.04058866575360298, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06409196555614471, + "max": 0.11513285338878632, + "mean": 0.0011910968460142612, + "std": 0.024711282923817635, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.3.g": { + "min": 0.374662309885025, + "max": 0.9300851821899414, + "mean": 0.7508615255355835, + "std": 0.04013195261359215, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.2791317403316498, + "max": 0.2725660502910614, + "mean": -0.00016837481234688312, + "std": 0.040994856506586075, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.1984652727842331, + "max": 0.05115879327058792, + "mean": -0.03202404826879501, + "std": 0.02509358339011669, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.6568311452865601, + "max": 0.5346067547798157, + "mean": -4.890329364570789e-05, + "std": 0.052846092730760574, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.19282352924346924, + "max": 0.5817168354988098, + "mean": -0.0005141475703567266, + "std": 0.04106360301375389, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.0.weight": { + "min": -0.41765207052230835, + "max": 0.3718544840812683, + "mean": 6.159986696729902e-06, + "std": 0.02162080444395542, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.21.1.g": { + "min": 0.21428614854812622, + "max": 0.7470263838768005, + "mean": 0.6495206356048584, + "std": 0.05435969680547714, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.20919783413410187, + "max": 0.19538012146949768, + "mean": 4.023606743430719e-05, + "std": 0.03946175053715706, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.32906630635261536, + "max": 0.25917014479637146, + "mean": -0.003227022010833025, + "std": 0.05624230206012726, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.20558328926563263, + "max": 0.2543526589870453, + "mean": 5.4226169595494866e-05, + "std": 0.038564346730709076, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.239154815673828, + "max": 6.927591800689697, + "mean": 0.04829341918230057, + "std": 1.3845902681350708, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.20949970185756683, + "max": 0.22989487648010254, + "mean": -5.106569460622268e-06, + "std": 0.0413125716149807, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.04377944767475128, + "max": 0.035965293645858765, + "mean": 6.696500349789858e-07, + "std": 0.012799888849258423, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.39747685194015503, + "max": 0.3446802794933319, + "mean": -5.5516902648378164e-05, + "std": 0.0423889197409153, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.05503125116229057, + "max": 0.06271757930517197, + "mean": 0.00036430457839742303, + "std": 0.018672339618206024, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.3.g": { + "min": 0.35033905506134033, + "max": 1.0429264307022095, + "mean": 0.7893730998039246, + "std": 0.048677314072847366, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.3334490656852722, + "max": 0.38581615686416626, + "mean": -0.00016950252756942064, + "std": 0.0414799265563488, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.1571725308895111, + "max": 0.059094030410051346, + "mean": -0.031832072883844376, + "std": 0.025125639513134956, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6957246661186218, + "max": 0.4681403636932373, + "mean": -8.918362436816096e-05, + "std": 0.051792457699775696, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.24794545769691467, + "max": 0.32831111550331116, + "mean": -0.000254548795055598, + "std": 0.04142748937010765, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.0.weight": { + "min": -0.286994069814682, + "max": 0.35009774565696716, + "mean": -2.1362816369219217e-06, + "std": 0.0242360457777977, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.22.1.g": { + "min": 0.1966284215450287, + "max": 0.7790648937225342, + "mean": 0.6702556014060974, + "std": 0.058683399111032486, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.22847537696361542, + "max": 0.23085317015647888, + "mean": -1.998914376599714e-05, + "std": 0.04043750837445259, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.2196640521287918, + "max": 0.2406841218471527, + "mean": 0.0007778428844176233, + "std": 0.05581061542034149, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.21546684205532074, + "max": 0.22625623643398285, + "mean": -7.170689787017182e-05, + "std": 0.039373625069856644, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.899069786071777, + "max": 9.061844825744629, + "mean": -0.0012379959225654602, + "std": 1.8475514650344849, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.2690274119377136, + "max": 0.2585972249507904, + "mean": 4.365673885331489e-05, + "std": 0.038405876606702805, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.05762965977191925, + "max": 0.057730112224817276, + "mean": 0.00035032647429034114, + "std": 0.014716975390911102, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.2643204629421234, + "max": 0.28830888867378235, + "mean": -6.177595059853047e-05, + "std": 0.03907199949026108, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.04382891580462456, + "max": 0.03727584704756737, + "mean": -8.995864482130855e-05, + "std": 0.013357071205973625, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.3.g": { + "min": 0.3394246995449066, + "max": 1.0903522968292236, + "mean": 0.8637199997901917, + "std": 0.06381762027740479, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.4231264889240265, + "max": 0.41881492733955383, + "mean": 0.00031262467382475734, + "std": 0.04350043460726738, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.21452167630195618, + "max": 0.1706276834011078, + "mean": -0.029481077566742897, + "std": 0.03191966935992241, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.5986213088035583, + "max": 0.5590333342552185, + "mean": -0.00015086884377524257, + "std": 0.05344516038894653, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17835262417793274, + "max": 0.3764508068561554, + "mean": 0.0013586997520178556, + "std": 0.03730103746056557, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.0.weight": { + "min": -0.3942283093929291, + "max": 0.3688967823982239, + "mean": 3.6990095395594835e-05, + "std": 0.028617417439818382, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.23.1.g": { + "min": 0.2902565002441406, + "max": 0.8266182541847229, + "mean": 0.7055412530899048, + "std": 0.06787826120853424, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.9262580275535583, + "max": 1.0264337062835693, + "mean": -2.6147403332288377e-05, + "std": 0.04762481153011322, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.8780329823493958, + "max": 0.8147000074386597, + "mean": -0.0003064283519051969, + "std": 0.09549984335899353, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.2694474458694458, + "max": 0.2405342310667038, + "mean": -2.2794924007030204e-05, + "std": 0.03895170986652374, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.725736618041992, + "max": 22.834732055664062, + "mean": -0.09184679388999939, + "std": 4.068049430847168, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.22741694748401642, + "max": 0.2447165697813034, + "mean": -2.5723496946739033e-05, + "std": 0.03863721713423729, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.06024840846657753, + "max": 0.04582807794213295, + "mean": -0.00014292271225713193, + "std": 0.014692682772874832, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.337954580783844, + "max": 0.3742024004459381, + "mean": 7.330418156925589e-06, + "std": 0.04081300273537636, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.04640491306781769, + "max": 0.19541829824447632, + "mean": 0.00027370243333280087, + "std": 0.013559137471020222, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.3.g": { + "min": 0.3744112551212311, + "max": 1.1277745962142944, + "mean": 0.8900341987609863, + "std": 0.06396359950304031, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.4476565718650818, + "max": 0.5421170592308044, + "mean": 2.477337693562731e-05, + "std": 0.04556567594408989, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.2238994538784027, + "max": 0.0882241502404213, + "mean": -0.03201638162136078, + "std": 0.03775238245725632, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7243073582649231, + "max": 0.6882233619689941, + "mean": 3.4276417864020914e-05, + "std": 0.05177783966064453, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.17440874874591827, + "max": 0.2182954102754593, + "mean": 4.099373472854495e-05, + "std": 0.0317707397043705, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.0.weight": { + "min": -0.33985471725463867, + "max": 0.3734351098537445, + "mean": 4.3027404899476096e-05, + "std": 0.03413975238800049, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.24.1.g": { + "min": 0.31756407022476196, + "max": 1.2844599485397339, + "mean": 0.6014232039451599, + "std": 0.08331646770238876, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.2830894887447357, + "max": 0.260119765996933, + "mean": -2.825315732479794e-06, + "std": 0.03598077595233917, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23531799018383026, + "max": 0.20526045560836792, + "mean": 0.00023797567700967193, + "std": 0.05601158365607262, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.43513408303260803, + "max": 0.324799120426178, + "mean": 2.434128509776201e-05, + "std": 0.03413143381476402, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.539924144744873, + "max": 7.305825233459473, + "mean": -0.007350243628025055, + "std": 0.6986610889434814, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.3433971107006073, + "max": 0.36268630623817444, + "mean": 0.00010339625441702083, + "std": 0.047828007489442825, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07370211184024811, + "max": 0.06033240258693695, + "mean": 0.0009340607211925089, + "std": 0.014942350797355175, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.2555631995201111, + "max": 0.28619974851608276, + "mean": 4.566820280160755e-06, + "std": 0.04155479371547699, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.05527225881814957, + "max": 0.0627666711807251, + "mean": 0.00013802105968352407, + "std": 0.0071632144972682, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.3.g": { + "min": 0.49384805560112, + "max": 1.2211062908172607, + "mean": 1.0134272575378418, + "std": 0.11744718253612518, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.093487024307251, + "max": 1.046884298324585, + "mean": -4.944120883010328e-05, + "std": 0.052408553659915924, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.22308824956417084, + "max": 0.17253872752189636, + "mean": -0.027238916605710983, + "std": 0.036325786262750626, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8834213018417358, + "max": 0.921511173248291, + "mean": -0.00014601324801333249, + "std": 0.05328161269426346, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.17091798782348633, + "max": 0.3795103430747986, + "mean": 0.0033677970059216022, + "std": 0.039878927171230316, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.0.weight": { + "min": -0.7767993211746216, + "max": 0.7229223251342773, + "mean": 1.8964092305395752e-05, + "std": 0.04616083949804306, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.25.1.g": { + "min": 0.3385705351829529, + "max": 1.4257850646972656, + "mean": 0.948320209980011, + "std": 0.20674099028110504, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.7456356287002563, + "max": 1.7042957544326782, + "mean": 0.00022721664572600275, + "std": 0.1586850881576538, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.1983858346939087, + "max": 1.0988513231277466, + "mean": -0.009531477466225624, + "std": 0.20368283987045288, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.4208756983280182, + "max": 0.4265652298927307, + "mean": 6.4577761804685e-05, + "std": 0.0480157844722271, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.72553825378418, + "max": 19.520837783813477, + "mean": -0.2481747567653656, + "std": 4.772479057312012, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.32345694303512573, + "max": 0.4378505349159241, + "mean": -1.1984889169980306e-05, + "std": 0.04616131633520126, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.03403974324464798, + "max": 0.03704509884119034, + "mean": 0.0006423466256819665, + "std": 0.012919273227453232, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.7029122710227966, + "max": 0.6650063395500183, + "mean": 4.321677261032164e-05, + "std": 0.05788154527544975, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.07217518985271454, + "max": 0.06747341901063919, + "mean": -0.00013201506226323545, + "std": 0.012908914126455784, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.3.g": { + "min": 0.38026899099349976, + "max": 1.3915380239486694, + "mean": 1.0665700435638428, + "std": 0.2197078913450241, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6161525845527649, + "max": 0.7168518304824829, + "mean": 0.00011199730215594172, + "std": 0.058020394295454025, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.21944588422775269, + "max": 0.22491848468780518, + "mean": 0.00621908949688077, + "std": 0.049715615808963776, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6295903325080872, + "max": 0.8891246914863586, + "mean": 1.184111533802934e-05, + "std": 0.023527733981609344, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.5063257217407227, + "max": 0.4734645485877991, + "mean": -0.0030142185278236866, + "std": 0.06923094391822815, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.norm_out.g": { + "min": 0.537803590297699, + "max": 1.1795684099197388, + "mean": 0.7827014327049255, + "std": 0.09878505766391754, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.proj_out.weight": { + "min": -0.2665232717990875, + "max": 0.21241135895252228, + "mean": -0.00022294482914730906, + "std": 0.05399605259299278, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ] + }, + "transformer.proj_out.bias": { + "min": -0.23782978951931, + "max": 0.014834473840892315, + "mean": -0.04395260661840439, + "std": 0.034306950867176056, + "sparsity": 0.0, + "shape": [ + 100 + ] + } + } +} \ No newline at end of file