diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,4683 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "recommendations": { + "focus_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ] + }, + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.43014463782310486, + "max": 0.2980782687664032, + "mean": -0.002543725073337555, + "std": 0.04256265610456467, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ] + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.0628998726606369, + "max": 0.1072736531496048, + "mean": 0.0006290247547440231, + "std": 0.034041259437799454, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.41270536184310913, + "max": 0.8369129300117493, + "mean": -0.00020170128846075386, + "std": 0.024111710488796234, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.11501855403184891, + "max": 0.3208469748497009, + "mean": -0.0009418133413419127, + "std": 0.019536493346095085, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.7886247634887695, + "max": 2.8676700592041016, + "mean": -0.0003673351602628827, + "std": 0.6154847145080566, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ] + }, + "transformer.input_embed.proj.weight": { + "min": -0.27889013290405273, + "max": 0.38151732087135315, + "mean": 0.0004236791573930532, + "std": 0.04274853691458702, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ] + }, + "transformer.input_embed.proj.bias": { + "min": -0.2219879925251007, + "max": 0.2091645449399948, + "mean": -0.004480332136154175, + "std": 0.040872007608413696, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.42831405997276306, + "max": 0.47610175609588623, + "mean": 3.765870360439294e-06, + "std": 0.024510981515049934, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.3244315981864929, + "max": 0.15647757053375244, + "mean": -0.046661682426929474, + "std": 0.05150889977812767, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.4104415476322174, + "max": 0.3546721041202545, + "mean": -0.00013054227747488767, + "std": 0.02360478602349758, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.22924789786338806, + "max": 0.2620227038860321, + "mean": -0.029105938971042633, + "std": 0.04928705468773842, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.1.g": { + "min": 0.2546031177043915, + "max": 0.8185229301452637, + "mean": 0.5252923965454102, + "std": 0.08049347996711731, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.296941339969635, + "max": 0.2655627429485321, + "mean": -0.00042586910421960056, + "std": 0.03210259974002838, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.09266690164804459, + "max": 0.12469176203012466, + "mean": 0.0006477286806330085, + "std": 0.025720255449414253, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.2905982434749603, + "max": 0.28104421496391296, + "mean": -7.51031911931932e-05, + "std": 0.03093179315328598, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.890929698944092, + "max": 5.805842876434326, + "mean": -0.009318170137703419, + "std": 1.2943130731582642, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.42498156428337097, + "max": 0.3436700105667114, + "mean": 9.80497570708394e-05, + "std": 0.029953550547361374, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.029002565890550613, + "max": 0.027599314227700233, + "mean": -0.0003237572673242539, + "std": 0.01257046777755022, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.45393431186676025, + "max": 0.44807320833206177, + "mean": 2.3895699996501207e-05, + "std": 0.023853935301303864, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.0885927751660347, + "max": 0.09089276939630508, + "mean": 0.0022863608319312334, + "std": 0.019503755494952202, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.3.g": { + "min": 0.2667909264564514, + "max": 1.0541586875915527, + "mean": 0.5309650301933289, + "std": 0.10402658581733704, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5743634104728699, + "max": 0.6081749796867371, + "mean": -0.0004296167171560228, + "std": 0.03860084339976311, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.18247899413108826, + "max": 0.04562002047896385, + "mean": -0.029428046196699142, + "std": 0.04256246238946915, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.1666945219039917, + "max": 1.633580207824707, + "mean": 0.00032344620558433235, + "std": 0.027696726843714714, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.16206279397010803, + "max": 0.20534056425094604, + "mean": -0.02111881598830223, + "std": 0.027917111292481422, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.1.g": { + "min": 0.22404542565345764, + "max": 0.8422443866729736, + "mean": 0.4874877631664276, + "std": 0.07493799924850464, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.255166620016098, + "max": 0.305690199136734, + "mean": -6.768415914848447e-06, + "std": 0.03347513824701309, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.09524397552013397, + "max": 0.11034096777439117, + "mean": 6.5918720792979e-05, + "std": 0.026950189843773842, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.29684391617774963, + "max": 0.295682817697525, + "mean": 5.335842433851212e-05, + "std": 0.03254625201225281, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.156938552856445, + "max": 5.0772905349731445, + "mean": -0.014555896632373333, + "std": 1.1561553478240967, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.3448536694049835, + "max": 0.34325698018074036, + "mean": 7.860038749640808e-05, + "std": 0.0300619974732399, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.03601115196943283, + "max": 0.03331650421023369, + "mean": -0.0001408920797985047, + "std": 0.013034623116254807, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.31532466411590576, + "max": 0.3747538924217224, + "mean": -2.0682646209024824e-05, + "std": 0.024059493094682693, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.10526668280363083, + "max": 0.12198653072118759, + "mean": -0.001968209631741047, + "std": 0.0288400761783123, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.3.g": { + "min": 0.3114672601222992, + "max": 1.1185976266860962, + "mean": 0.6660763025283813, + "std": 0.09736555069684982, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.872668981552124, + "max": 0.6275054216384888, + "mean": 0.0016755887772887945, + "std": 0.04743882641196251, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.2710971236228943, + "max": 0.03426326811313629, + "mean": -0.0465819425880909, + "std": 0.04054969921708107, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.922234833240509, + "max": 0.9643772840499878, + "mean": 0.0010214494541287422, + "std": 0.04070669412612915, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.14429129660129547, + "max": 0.07484762370586395, + "mean": -0.00908473040908575, + "std": 0.025672495365142822, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.1.g": { + "min": 0.2402428686618805, + "max": 0.711609423160553, + "mean": 0.44710344076156616, + "std": 0.05906940996646881, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.27207210659980774, + "max": 0.29753801226615906, + "mean": 9.350525942863896e-06, + "std": 0.035469669848680496, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.11918215453624725, + "max": 0.1183757483959198, + "mean": 0.0007599537493661046, + "std": 0.027609599754214287, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.2805421054363251, + "max": 0.2793859839439392, + "mean": -7.715764513704926e-05, + "std": 0.035099178552627563, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.506035566329956, + "max": 2.518012046813965, + "mean": 0.026713747531175613, + "std": 0.5862806439399719, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.22091814875602722, + "max": 0.27132153511047363, + "mean": 2.8913364076288417e-06, + "std": 0.0307327788323164, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.03352135419845581, + "max": 0.03120853193104267, + "mean": 0.00011218251165701076, + "std": 0.012406233698129654, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.2351619005203247, + "max": 0.23147742450237274, + "mean": 5.693763887393288e-05, + "std": 0.0256962887942791, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.1356453150510788, + "max": 0.1271977722644806, + "mean": -0.005494291428476572, + "std": 0.0399438738822937, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.3.g": { + "min": 0.3544028699398041, + "max": 1.1697261333465576, + "mean": 0.7103750109672546, + "std": 0.10338432341814041, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6172477006912231, + "max": 0.5542004108428955, + "mean": 0.001160221640020609, + "std": 0.046119727194309235, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.18825410306453705, + "max": 0.024966172873973846, + "mean": -0.03482227772474289, + "std": 0.02857418917119503, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.130850911140442, + "max": 0.9707417488098145, + "mean": 0.00035950675373896956, + "std": 0.042347487062215805, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.5971466898918152, + "max": 0.06270916759967804, + "mean": -0.004877141211181879, + "std": 0.02859053947031498, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.1.g": { + "min": 0.3752330243587494, + "max": 0.9386839866638184, + "mean": 0.5923458337783813, + "std": 0.06656130403280258, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.3911682367324829, + "max": 0.3688437342643738, + "mean": 7.11916436557658e-05, + "std": 0.037188753485679626, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11875540018081665, + "max": 0.13628698885440826, + "mean": 0.0009287752327509224, + "std": 0.029227793216705322, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.6185974478721619, + "max": 0.5083587169647217, + "mean": 1.5249221178237349e-05, + "std": 0.036442261189222336, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.17552661895752, + "max": 8.776671409606934, + "mean": -0.1091664582490921, + "std": 1.6969325542449951, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.27638494968414307, + "max": 0.23973813652992249, + "mean": 5.319732736097649e-05, + "std": 0.03261549770832062, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.051992662250995636, + "max": 0.03946495056152344, + "mean": 9.150505502475426e-05, + "std": 0.012954742647707462, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.23067787289619446, + "max": 0.23443163931369781, + "mean": -2.1657764591509476e-05, + "std": 0.029391853138804436, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.20401200652122498, + "max": 0.10544212907552719, + "mean": -0.004023304674774408, + "std": 0.0326065756380558, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.3.g": { + "min": 0.33983615040779114, + "max": 1.0106816291809082, + "mean": 0.7006407380104065, + "std": 0.09645594656467438, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5642791390419006, + "max": 0.832179069519043, + "mean": 0.00041513514588586986, + "std": 0.042302437126636505, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.21134838461875916, + "max": 0.030589817091822624, + "mean": -0.032172758132219315, + "std": 0.026476319879293442, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7536408305168152, + "max": 0.717832088470459, + "mean": -9.409402991877869e-06, + "std": 0.03684220835566521, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.2631220519542694, + "max": 0.10570736974477768, + "mean": -0.003029324347153306, + "std": 0.028848078101873398, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.1.g": { + "min": 0.28446710109710693, + "max": 0.6937389373779297, + "mean": 0.49939653277397156, + "std": 0.04629269987344742, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.27887189388275146, + "max": 0.23408503830432892, + "mean": -0.00011133110092487186, + "std": 0.03876320272684097, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15426576137542725, + "max": 0.1266399770975113, + "mean": -0.0022300498094409704, + "std": 0.0333842970430851, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.41348376870155334, + "max": 0.6593844294548035, + "mean": -1.9785951735684648e-05, + "std": 0.039100244641304016, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.232041358947754, + "max": 4.715827465057373, + "mean": -0.020488303154706955, + "std": 1.0068391561508179, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.24481239914894104, + "max": 0.2074868232011795, + "mean": 4.380439349915832e-05, + "std": 0.03396626561880112, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.03449943661689758, + "max": 0.044728994369506836, + "mean": -1.8020247807726264e-05, + "std": 0.012624197639524937, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.20050014555454254, + "max": 0.20566238462924957, + "mean": -2.96780690405285e-05, + "std": 0.03102380409836769, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.19964830577373505, + "max": 0.11326169967651367, + "mean": -0.00291792256757617, + "std": 0.03448895364999771, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.3.g": { + "min": 0.36708179116249084, + "max": 1.0548574924468994, + "mean": 0.6704699397087097, + "std": 0.06616173684597015, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.397816002368927, + "max": 0.5021188855171204, + "mean": -3.856579860439524e-05, + "std": 0.041137274354696274, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.12784262001514435, + "max": 0.02675941213965416, + "mean": -0.030531462281942368, + "std": 0.02184327319264412, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.4485797882080078, + "max": 0.43235480785369873, + "mean": 8.378910570172593e-05, + "std": 0.034896139055490494, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.26721277832984924, + "max": 0.07248232513666153, + "mean": -0.0011095060035586357, + "std": 0.023109637200832367, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.1.g": { + "min": 0.287344753742218, + "max": 0.6839542388916016, + "mean": 0.5244242548942566, + "std": 0.047291453927755356, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.22201856970787048, + "max": 0.22311273217201233, + "mean": 1.5777890439494513e-05, + "std": 0.038952890783548355, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.13627174496650696, + "max": 0.1090594157576561, + "mean": 0.00023713918926659971, + "std": 0.029215561226010323, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.3747805953025818, + "max": 0.43678468465805054, + "mean": -9.57340671448037e-06, + "std": 0.03928905352950096, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.840266227722168, + "max": 4.992228984832764, + "mean": 0.009751387871801853, + "std": 0.8444771766662598, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.22314536571502686, + "max": 0.21986283361911774, + "mean": -2.0973857317585498e-07, + "std": 0.034413520246744156, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.043581560254096985, + "max": 0.03578736633062363, + "mean": -0.00025875651044771075, + "std": 0.012076529674232006, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.21286383271217346, + "max": 0.18843913078308105, + "mean": -1.6783853425295092e-05, + "std": 0.03154028207063675, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.18049854040145874, + "max": 0.12063688784837723, + "mean": -0.0024107899516820908, + "std": 0.04124762490391731, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.3.g": { + "min": 0.4223836064338684, + "max": 0.9401367902755737, + "mean": 0.6626168489456177, + "std": 0.05654710531234741, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.3711914122104645, + "max": 0.4754900634288788, + "mean": -8.231064566643909e-05, + "std": 0.04089626669883728, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.2078404426574707, + "max": 0.02713177166879177, + "mean": -0.030231105163693428, + "std": 0.021318932995200157, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.3397354185581207, + "max": 0.7327741384506226, + "mean": 8.487920422339812e-05, + "std": 0.03477150574326515, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.23985552787780762, + "max": 0.050368692725896835, + "mean": -0.0011948456522077322, + "std": 0.02045026607811451, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.1.g": { + "min": 0.3060871660709381, + "max": 0.6523372530937195, + "mean": 0.5249941945075989, + "std": 0.04590437561273575, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.30396750569343567, + "max": 0.2171545922756195, + "mean": 7.000747427809983e-05, + "std": 0.03949857875704765, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.14921154081821442, + "max": 0.1312280148267746, + "mean": 0.00034826344926841557, + "std": 0.030445020645856857, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.2569451630115509, + "max": 0.20191657543182373, + "mean": 3.105865471297875e-05, + "std": 0.03948771581053734, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.332984685897827, + "max": 2.372544527053833, + "mean": -0.026222502812743187, + "std": 0.44942858815193176, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.1888340413570404, + "max": 0.21024198830127716, + "mean": 3.719786036526784e-05, + "std": 0.03479824960231781, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.031675707548856735, + "max": 0.035443130880594254, + "mean": -0.00020022659737151116, + "std": 0.012285580858588219, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.18818390369415283, + "max": 0.17026524245738983, + "mean": -6.799849506933242e-05, + "std": 0.032174814492464066, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.13918116688728333, + "max": 0.13709498941898346, + "mean": -0.0025172303430736065, + "std": 0.05128452926874161, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.3.g": { + "min": 0.4672186076641083, + "max": 0.9546743631362915, + "mean": 0.6688124537467957, + "std": 0.05250026285648346, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.32424914836883545, + "max": 0.3096342980861664, + "mean": -1.5644909581169486e-06, + "std": 0.04095214605331421, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.12461961060762405, + "max": 0.02530832216143608, + "mean": -0.03069971315562725, + "std": 0.019789544865489006, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.43944308161735535, + "max": 0.4446093440055847, + "mean": 9.534660784993321e-05, + "std": 0.035124197602272034, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.22425536811351776, + "max": 0.051573775708675385, + "mean": -0.001182063017040491, + "std": 0.018455415964126587, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.1.g": { + "min": 0.3393731713294983, + "max": 0.737841010093689, + "mean": 0.5586089491844177, + "std": 0.04119626432657242, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.2723452150821686, + "max": 0.2782283425331116, + "mean": 1.991558019653894e-05, + "std": 0.04106247052550316, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.13683027029037476, + "max": 0.1396752893924713, + "mean": 0.0004885591333732009, + "std": 0.026614630594849586, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.49012690782546997, + "max": 0.35547417402267456, + "mean": 8.882825204636902e-05, + "std": 0.04070047289133072, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.2938547134399414, + "max": 1.7426533699035645, + "mean": -0.021057037636637688, + "std": 0.49975258111953735, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.21735826134681702, + "max": 0.19773884117603302, + "mean": -4.0639675717102364e-05, + "std": 0.03423747047781944, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.041265569627285004, + "max": 0.03861430287361145, + "mean": -0.00014519633259624243, + "std": 0.012876993976533413, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17728237807750702, + "max": 0.18350861966609955, + "mean": 4.760306910611689e-05, + "std": 0.031560394912958145, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.1796274185180664, + "max": 0.18359197676181793, + "mean": -0.0022178757935762405, + "std": 0.05480958893895149, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.3.g": { + "min": 0.47430306673049927, + "max": 1.0235347747802734, + "mean": 0.645234227180481, + "std": 0.05006485432386398, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.2717384696006775, + "max": 0.3092706799507141, + "mean": 0.00011245072528254241, + "std": 0.04068849980831146, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.10565188527107239, + "max": 0.026852920651435852, + "mean": -0.029502389952540398, + "std": 0.017905903980135918, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.33881059288978577, + "max": 0.3287763297557831, + "mean": 5.716992018278688e-05, + "std": 0.03441813588142395, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.1814029961824417, + "max": 0.04198184236884117, + "mean": -0.0010715797543525696, + "std": 0.017202889546751976, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.1.g": { + "min": 0.32546839118003845, + "max": 0.6852879524230957, + "mean": 0.5111152529716492, + "std": 0.036710962653160095, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.23360855877399445, + "max": 0.22551532089710236, + "mean": -3.5930093872593716e-05, + "std": 0.039181701838970184, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.11516069620847702, + "max": 0.13141536712646484, + "mean": 0.00015141721814870834, + "std": 0.02916705049574375, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.3523465394973755, + "max": 0.2849816083908081, + "mean": 7.249595455505187e-06, + "std": 0.039250195026397705, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.126643180847168, + "max": 3.538667678833008, + "mean": -0.011556778103113174, + "std": 0.681910514831543, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.2112656831741333, + "max": 0.20894697308540344, + "mean": 3.47470777342096e-05, + "std": 0.03448949381709099, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.03565378487110138, + "max": 0.0480014868080616, + "mean": 0.0007942374795675278, + "std": 0.012850471772253513, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.21031072735786438, + "max": 0.19297289848327637, + "mean": -1.28749752548174e-06, + "std": 0.03169998526573181, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.18637274205684662, + "max": 0.17692941427230835, + "mean": -0.0028488910757005215, + "std": 0.05860321223735809, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.3.g": { + "min": 0.47467249631881714, + "max": 1.0397725105285645, + "mean": 0.6513394117355347, + "std": 0.049329087138175964, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.248422771692276, + "max": 0.32902756333351135, + "mean": 0.00018066739721689373, + "std": 0.04057690501213074, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.12427264451980591, + "max": 0.024594629183411598, + "mean": -0.030488643795251846, + "std": 0.017578164115548134, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.4205840826034546, + "max": 0.4813268184661865, + "mean": 2.1296489194355672e-06, + "std": 0.035403117537498474, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.15161579847335815, + "max": 0.043303120881319046, + "mean": 3.9640130125917494e-05, + "std": 0.014866231009364128, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.1.g": { + "min": 0.3155551552772522, + "max": 0.6806549429893494, + "mean": 0.5528165102005005, + "std": 0.04051704332232475, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.2062118798494339, + "max": 0.21964126825332642, + "mean": 3.0860344850225374e-05, + "std": 0.038303423672914505, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.1376407891511917, + "max": 0.11259414255619049, + "mean": 2.069001493509859e-05, + "std": 0.02579990215599537, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.40213435888290405, + "max": 0.3705216944217682, + "mean": 2.6252397219650447e-05, + "std": 0.03818526491522789, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.765413761138916, + "max": 2.86456298828125, + "mean": 0.0011342763900756836, + "std": 0.5163310766220093, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.20278441905975342, + "max": 0.1972842514514923, + "mean": 2.9531776817748323e-05, + "std": 0.034300558269023895, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.05089922249317169, + "max": 0.03997639939188957, + "mean": -0.00041936602792702615, + "std": 0.013420597650110722, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.19621425867080688, + "max": 0.20147208869457245, + "mean": -1.232856902788626e-05, + "std": 0.0318082757294178, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.19283677637577057, + "max": 0.1948237270116806, + "mean": -0.002969849156215787, + "std": 0.06253352016210556, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.3.g": { + "min": 0.34950727224349976, + "max": 1.081899642944336, + "mean": 0.6671000123023987, + "std": 0.05490493029356003, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22493921220302582, + "max": 0.2511034309864044, + "mean": 0.00035913087776862085, + "std": 0.04076593369245529, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.09088904410600662, + "max": 0.04371574521064758, + "mean": -0.030075963586568832, + "std": 0.01758558303117752, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.35314324498176575, + "max": 0.303651362657547, + "mean": -4.348178117652424e-05, + "std": 0.03712818771600723, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.16180230677127838, + "max": 0.0634349063038826, + "mean": -8.249300299212337e-05, + "std": 0.019394585862755775, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.1.g": { + "min": 0.34883353114128113, + "max": 0.7206243872642517, + "mean": 0.5422865748405457, + "std": 0.03884800896048546, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.21920670568943024, + "max": 0.22291362285614014, + "mean": -1.1165884643560275e-05, + "std": 0.039236169308423996, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.11826413869857788, + "max": 0.17058128118515015, + "mean": 0.0002835137420333922, + "std": 0.02510087564587593, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.2464587390422821, + "max": 0.3006129264831543, + "mean": -3.6620229366235435e-05, + "std": 0.03893572464585304, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.4999661445617676, + "max": 3.709076166152954, + "mean": 0.015840880572795868, + "std": 0.7814859747886658, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.2185182124376297, + "max": 0.23746132850646973, + "mean": -1.3619630408356898e-05, + "std": 0.03630794584751129, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04712348431348801, + "max": 0.05133059248328209, + "mean": 0.00048102246364578605, + "std": 0.01351132895797491, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.21373434364795685, + "max": 0.2173190861940384, + "mean": 5.6508688430767506e-05, + "std": 0.033619917929172516, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.21108141541481018, + "max": 0.23115544021129608, + "mean": -0.005106039810925722, + "std": 0.06184696406126022, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.3.g": { + "min": 0.36205485463142395, + "max": 1.099104642868042, + "mean": 0.6992122530937195, + "std": 0.05326760187745094, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.23436696827411652, + "max": 0.24465103447437286, + "mean": 0.00046349133481271565, + "std": 0.04127480834722519, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.09793505817651749, + "max": 0.0681939497590065, + "mean": -0.03142588585615158, + "std": 0.0180974081158638, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.3012528717517853, + "max": 0.3511028289794922, + "mean": -8.162344602169469e-05, + "std": 0.04028059542179108, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.15210135281085968, + "max": 0.14944450557231903, + "mean": 0.00025588623248040676, + "std": 0.023021480068564415, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.1.g": { + "min": 1.0, + "max": 1.0, + "mean": 1.0, + "std": 0.0, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.031249936670064926, + "max": 0.031249839812517166, + "mean": -1.929272730194498e-05, + "std": 0.01804409734904766, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.031226642429828644, + "max": 0.03100142627954483, + "mean": -0.0010842883493751287, + "std": 0.01795371063053608, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.031249966472387314, + "max": 0.031249895691871643, + "mean": 3.5441080399323255e-06, + "std": 0.018044503405690193, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.031156372278928757, + "max": 0.031184475868940353, + "mean": 0.0003338930255267769, + "std": 0.018065759912133217, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.3.g": { + "min": 1.0, + "max": 1.0, + "mean": 1.0, + "std": 0.0, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.031249985098838806, + "max": 0.031249992549419403, + "mean": -8.393528332817368e-06, + "std": 0.018043218180537224, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.03124961629509926, + "max": 0.031239181756973267, + "mean": 0.00015365774743258953, + "std": 0.017994258552789688, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.11.4.ff.2.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.11.4.ff.2.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.1.g": { + "min": 0.3829966187477112, + "max": 0.718121349811554, + "mean": 0.5806018114089966, + "std": 0.03862323611974716, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.23782959580421448, + "max": 0.1963561624288559, + "mean": 2.662676888576243e-05, + "std": 0.03746971860527992, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.11848776042461395, + "max": 0.1658152937889099, + "mean": 0.0009899433935061097, + "std": 0.027532605454325676, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.2458610236644745, + "max": 0.5000857710838318, + "mean": -5.0437993195373565e-05, + "std": 0.037627607583999634, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.936108350753784, + "max": 3.7635273933410645, + "mean": -0.003571532666683197, + "std": 0.6807447671890259, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.2272127866744995, + "max": 0.25125452876091003, + "mean": -1.1669096238620114e-05, + "std": 0.03743912652134895, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07160257548093796, + "max": 0.08056868612766266, + "mean": -0.0005193912656977773, + "std": 0.015654100105166435, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.22808189690113068, + "max": 0.25764524936676025, + "mean": -2.8624446713365614e-05, + "std": 0.03542578965425491, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.2000962197780609, + "max": 0.21490387618541718, + "mean": -0.0055319443345069885, + "std": 0.0682973712682724, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.3.g": { + "min": 0.4052681028842926, + "max": 1.1870543956756592, + "mean": 0.7378469705581665, + "std": 0.05485502630472183, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.22090063989162445, + "max": 0.24591459333896637, + "mean": 0.0005211709067225456, + "std": 0.041342560201883316, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.10329551994800568, + "max": 0.02418467588722706, + "mean": -0.03265417367219925, + "std": 0.0188569538295269, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.44879788160324097, + "max": 0.421781986951828, + "mean": -0.0004324353067204356, + "std": 0.046903904527425766, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.25108596682548523, + "max": 0.46939900517463684, + "mean": 0.003194585908204317, + "std": 0.04450792446732521, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.0.weight": { + "min": -0.3169757127761841, + "max": 0.33316904306411743, + "mean": -2.5288816686952487e-05, + "std": 0.021290883421897888, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.13.1.g": { + "min": 0.3246179223060608, + "max": 0.6840593218803406, + "mean": 0.5709414482116699, + "std": 0.04453985393047333, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.16449199616909027, + "max": 0.17385058104991913, + "mean": -4.8540678108111024e-05, + "std": 0.033184703439474106, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18657186627388, + "max": 0.14269262552261353, + "mean": 3.6818586522713304e-05, + "std": 0.029670175164937973, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.3801823556423187, + "max": 0.24568894505500793, + "mean": -1.0017956810770556e-05, + "std": 0.0327659472823143, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.6502017974853516, + "max": 3.2850754261016846, + "mean": -0.014260413125157356, + "std": 0.9845133423805237, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.2349099963903427, + "max": 0.2473423033952713, + "mean": -1.7784630472306162e-05, + "std": 0.04170290008187294, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.07268015295267105, + "max": 0.1542970985174179, + "mean": 0.000663664482999593, + "std": 0.02515619620680809, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.2664458751678467, + "max": 0.2483866959810257, + "mean": -1.5342582628363743e-05, + "std": 0.040143273770809174, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.18931904435157776, + "max": 0.19443899393081665, + "mean": -0.0012288358993828297, + "std": 0.06666287034749985, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.3.g": { + "min": 0.32919859886169434, + "max": 0.997564435005188, + "mean": 0.7190552949905396, + "std": 0.051983967423439026, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.2313733994960785, + "max": 0.24550800025463104, + "mean": 0.0001826301304390654, + "std": 0.04090628772974014, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11402574181556702, + "max": 0.018650896847248077, + "mean": -0.0424647182226181, + "std": 0.0188254714012146, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.3894314467906952, + "max": 0.4067791998386383, + "mean": -2.184629556722939e-05, + "std": 0.048540692776441574, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.692162811756134, + "max": 0.4120035469532013, + "mean": 0.000852768833283335, + "std": 0.060242246836423874, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.0.weight": { + "min": 0.0, + "max": 1.0, + "mean": 0.00048828125, + "std": 0.0220916960388422, + "sparsity": 0.99951171875, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.14.1.g": { + "min": 1.0, + "max": 1.0, + "mean": 1.0, + "std": 0.0, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.031249970197677612, + "max": 0.031249817460775375, + "mean": -2.1022660803282633e-05, + "std": 0.018035436049103737, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.03122086077928543, + "max": 0.031233571469783783, + "mean": -0.0006771883927285671, + "std": 0.01782997138798237, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.03124987706542015, + "max": 0.031249921768903732, + "mean": -8.839060683385469e-06, + "std": 0.01803446188569069, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.031232360750436783, + "max": 0.031245984137058258, + "mean": -0.0007298353011719882, + "std": 0.017944591119885445, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.3.g": { + "min": 1.0, + "max": 1.0, + "mean": 1.0, + "std": 0.0, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.03125, + "max": 0.031249988824129105, + "mean": 3.5917482819058932e-06, + "std": 0.018040824681520462, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.031234480440616608, + "max": 0.031246982514858246, + "mean": 0.0001957040512934327, + "std": 0.018076537176966667, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.14.4.ff.2.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.14.4.ff.2.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.0.weight": { + "min": -0.23450319468975067, + "max": 0.2724616229534149, + "mean": 6.94814343660255e-06, + "std": 0.01881224475800991, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.15.1.g": { + "min": 0.32128843665122986, + "max": 0.6922435760498047, + "mean": 0.5815606117248535, + "std": 0.045744746923446655, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.18168264627456665, + "max": 0.1974717229604721, + "mean": -1.1712746527337003e-05, + "std": 0.03318728506565094, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.16043128073215485, + "max": 0.1292782723903656, + "mean": -0.0010662535205483437, + "std": 0.034117527306079865, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.3318951725959778, + "max": 0.31116846203804016, + "mean": -1.0326406481908634e-05, + "std": 0.03223801404237747, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.791203022003174, + "max": 8.74953842163086, + "mean": 0.09337067604064941, + "std": 1.61784029006958, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23363685607910156, + "max": 0.24183623492717743, + "mean": 4.133233960601501e-05, + "std": 0.0408620610833168, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07588791847229004, + "max": 0.0656837597489357, + "mean": 0.00047856790479272604, + "std": 0.01940334029495716, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.2455652505159378, + "max": 0.2337566763162613, + "mean": -2.8880322133773006e-06, + "std": 0.03943672403693199, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.16261433064937592, + "max": 0.1605682373046875, + "mean": 0.0016338212881237268, + "std": 0.06525633484125137, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.3.g": { + "min": 0.5568146705627441, + "max": 0.9421050548553467, + "mean": 0.7127699851989746, + "std": 0.03979077190160751, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.22831875085830688, + "max": 0.2548784911632538, + "mean": -4.536488631856628e-05, + "std": 0.040581412613391876, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.13459284603595734, + "max": 0.02228192612528801, + "mean": -0.04134010896086693, + "std": 0.018355557695031166, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.4211236536502838, + "max": 0.3922184407711029, + "mean": -4.3558175093494356e-06, + "std": 0.04779110848903656, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.6065256595611572, + "max": 0.6503778696060181, + "mean": 0.0015810506884008646, + "std": 0.05679204687476158, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.0.weight": { + "min": -0.2516687214374542, + "max": 0.3206498920917511, + "mean": -6.05763898420264e-06, + "std": 0.0196156594902277, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.16.1.g": { + "min": 0.35995498299598694, + "max": 0.6810278296470642, + "mean": 0.5706292986869812, + "std": 0.042767371982336044, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.22037938237190247, + "max": 0.1769036501646042, + "mean": -3.4671174944378436e-05, + "std": 0.03430242836475372, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.16339237987995148, + "max": 0.23269455134868622, + "mean": 0.00036311167059466243, + "std": 0.03283863142132759, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.2634328007698059, + "max": 0.23954781889915466, + "mean": -5.2383129514055327e-05, + "std": 0.03390158340334892, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.847443580627441, + "max": 5.083292484283447, + "mean": 0.043835077434778214, + "std": 1.227935552597046, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.24653136730194092, + "max": 0.25027644634246826, + "mean": 7.213905337266624e-05, + "std": 0.04399324953556061, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.06254159659147263, + "max": 0.054444003850221634, + "mean": 0.000650427769869566, + "std": 0.017183585092425346, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.28619009256362915, + "max": 0.2717132866382599, + "mean": -4.993668699171394e-05, + "std": 0.04299163073301315, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.16040603816509247, + "max": 0.17025713622570038, + "mean": -0.0028844610787928104, + "std": 0.05926158279180527, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.3.g": { + "min": 0.5196964740753174, + "max": 0.9310137629508972, + "mean": 0.7133955955505371, + "std": 0.03807961940765381, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23809659481048584, + "max": 0.24939550459384918, + "mean": 0.00046480150194838643, + "std": 0.04046152904629707, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.14403879642486572, + "max": 0.041449662297964096, + "mean": -0.03967723995447159, + "std": 0.02051496133208275, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.5321223735809326, + "max": 0.582199215888977, + "mean": 5.944145414105151e-06, + "std": 0.04886837303638458, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5183588862419128, + "max": 0.49274152517318726, + "mean": 0.0023598431143909693, + "std": 0.053401440382003784, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.0.weight": { + "min": -0.27355626225471497, + "max": 0.31514689326286316, + "mean": 1.8169575923820958e-06, + "std": 0.020052826032042503, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.17.1.g": { + "min": 0.36634165048599243, + "max": 0.7102516293525696, + "mean": 0.5930806994438171, + "std": 0.04571138322353363, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.21087931096553802, + "max": 0.1994456797838211, + "mean": 3.073544939979911e-05, + "std": 0.034868594259023666, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.1869715005159378, + "max": 0.20369935035705566, + "mean": 0.0009553421987220645, + "std": 0.0314984992146492, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.28932973742485046, + "max": 0.33943668007850647, + "mean": -4.7415782319149e-05, + "std": 0.034589733928442, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.8712191581726074, + "max": 3.3820998668670654, + "mean": 0.014444351196289062, + "std": 0.8576834797859192, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.2242382913827896, + "max": 0.24965918064117432, + "mean": -4.01433771912707e-06, + "std": 0.04223589971661568, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.05498581379652023, + "max": 0.046769097447395325, + "mean": -1.842428173404187e-05, + "std": 0.015840334817767143, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.2928566634654999, + "max": 0.29091376066207886, + "mean": -7.361577445408329e-06, + "std": 0.04195090010762215, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.12467863410711288, + "max": 0.25901108980178833, + "mean": -0.003233879804611206, + "std": 0.05313729867339134, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.3.g": { + "min": 0.4561373293399811, + "max": 0.8428487777709961, + "mean": 0.7054461240768433, + "std": 0.03489769622683525, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.5113534331321716, + "max": 0.3484715223312378, + "mean": 0.00034262536792084575, + "std": 0.04020649194717407, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.18678922951221466, + "max": 0.03952203318476677, + "mean": -0.03937358409166336, + "std": 0.02131999284029007, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.5436691045761108, + "max": 0.5556817054748535, + "mean": -7.178769010351971e-05, + "std": 0.05074293538928032, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5110356211662292, + "max": 0.6633175015449524, + "mean": 0.002444919664412737, + "std": 0.04948664829134941, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.0.weight": { + "min": -0.3323739171028137, + "max": 0.2654549777507782, + "mean": 3.673958872241201e-06, + "std": 0.019390413537621498, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.18.1.g": { + "min": 0.32227811217308044, + "max": 0.7648001313209534, + "mean": 0.6509190201759338, + "std": 0.04508262872695923, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.24930793046951294, + "max": 0.21936655044555664, + "mean": -2.4470787138852756e-06, + "std": 0.036502547562122345, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.32666686177253723, + "max": 0.2868551015853882, + "mean": -0.0006774846115149558, + "std": 0.03851696848869324, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.3097042739391327, + "max": 0.3694048821926117, + "mean": 6.485832273028791e-05, + "std": 0.03624315932393074, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.71013069152832, + "max": 5.798623085021973, + "mean": 0.03792855516076088, + "std": 1.41161048412323, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.22137394547462463, + "max": 0.20554855465888977, + "mean": -7.500727224396542e-05, + "std": 0.042491503059864044, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.07746972888708115, + "max": 0.05126894265413284, + "mean": -0.0009250898147001863, + "std": 0.016401393339037895, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.33084556460380554, + "max": 0.32904890179634094, + "mean": -4.916641955787782e-06, + "std": 0.042798250913619995, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.2845572233200073, + "max": 0.11143017560243607, + "mean": -0.0012043914757668972, + "std": 0.04699280112981796, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.3.g": { + "min": 0.48666608333587646, + "max": 0.885034441947937, + "mean": 0.7373895049095154, + "std": 0.03794779255986214, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.3611343502998352, + "max": 0.27392831444740295, + "mean": 5.120676723890938e-05, + "std": 0.04065323248505592, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.2472306787967682, + "max": 0.046531591564416885, + "mean": -0.03925502672791481, + "std": 0.023223698139190674, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.62546706199646, + "max": 0.596234142780304, + "mean": -6.186794053064659e-05, + "std": 0.0531260222196579, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.7086492776870728, + "max": 0.2654070556163788, + "mean": 0.0009191531571559608, + "std": 0.05119417607784271, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.0.weight": { + "min": -0.34331265091896057, + "max": 0.30340248346328735, + "mean": 2.337387741135899e-07, + "std": 0.019139692187309265, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.19.1.g": { + "min": 0.3500247001647949, + "max": 0.7813002467155457, + "mean": 0.6387312412261963, + "std": 0.048984214663505554, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.20559599995613098, + "max": 0.20657846331596375, + "mean": -5.995871470076963e-05, + "std": 0.03769858554005623, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.25827330350875854, + "max": 0.26797717809677124, + "mean": -0.00040583324152976274, + "std": 0.04458905756473541, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.35375165939331055, + "max": 0.32213273644447327, + "mean": -7.3352221079403535e-06, + "std": 0.03720685839653015, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.253459930419922, + "max": 4.198183536529541, + "mean": -0.0263908039778471, + "std": 1.0056793689727783, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.23853513598442078, + "max": 0.24350698292255402, + "mean": -2.557578045525588e-05, + "std": 0.04321583732962608, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.06232254579663277, + "max": 0.05653427913784981, + "mean": 0.0003516775614116341, + "std": 0.014141896739602089, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.437425822019577, + "max": 0.3736904561519623, + "mean": 1.4616349290008657e-05, + "std": 0.044127896428108215, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.09596914798021317, + "max": 0.17601557075977325, + "mean": -0.0006586366798728704, + "std": 0.03512872755527496, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.3.g": { + "min": 0.42178472876548767, + "max": 1.06712007522583, + "mean": 0.7484290599822998, + "std": 0.04182668402791023, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.26583534479141235, + "max": 0.29665902256965637, + "mean": -7.891674613347277e-05, + "std": 0.04081389307975769, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.18455219268798828, + "max": 0.043140046298503876, + "mean": -0.03679502755403519, + "std": 0.0255513247102499, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.45756417512893677, + "max": 0.4861648976802826, + "mean": 4.398237433633767e-05, + "std": 0.05422103777527809, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.2858409285545349, + "max": 0.5508930087089539, + "mean": -0.0008807203266769648, + "std": 0.047792647033929825, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.0.weight": { + "min": -0.2925868332386017, + "max": 0.32265621423721313, + "mean": 6.008184755046386e-06, + "std": 0.0199727825820446, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.20.1.g": { + "min": 0.2913132309913635, + "max": 0.7585903406143188, + "mean": 0.6507112979888916, + "std": 0.05193017050623894, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.24352194368839264, + "max": 0.26151588559150696, + "mean": -5.6967624004755635e-06, + "std": 0.03961416333913803, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.26712363958358765, + "max": 0.19983239471912384, + "mean": -0.0008771903812885284, + "std": 0.0517287477850914, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.2718246877193451, + "max": 0.25335949659347534, + "mean": 5.2391228564374615e-06, + "std": 0.03871086984872818, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_k.bias": { + "min": -12.94522476196289, + "max": 15.922240257263184, + "mean": 0.03318937495350838, + "std": 1.9867888689041138, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.20649555325508118, + "max": 0.22559243440628052, + "mean": -7.25646095816046e-05, + "std": 0.040558841079473495, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06932304799556732, + "max": 0.06304260343313217, + "mean": 0.0001579949603183195, + "std": 0.014740646816790104, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.4653640687465668, + "max": 0.3200652003288269, + "mean": 1.9525992684066296e-05, + "std": 0.04059439152479172, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06398282200098038, + "max": 0.11537733674049377, + "mean": 0.0011978133115917444, + "std": 0.02469516545534134, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.3.g": { + "min": 0.3749999403953552, + "max": 0.9300609230995178, + "mean": 0.7510109543800354, + "std": 0.040018972009420395, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.27868181467056274, + "max": 0.27277180552482605, + "mean": -0.00016834630514495075, + "std": 0.041004978120326996, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.19812321662902832, + "max": 0.05135354399681091, + "mean": -0.032012395560741425, + "std": 0.025048717856407166, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.65754234790802, + "max": 0.5349372029304504, + "mean": -5.049940955359489e-05, + "std": 0.052857208997011185, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.1923648864030838, + "max": 0.5813060998916626, + "mean": -0.0005128913326188922, + "std": 0.041049525141716, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.0.weight": { + "min": -0.417529821395874, + "max": 0.3719121813774109, + "mean": 6.524643140437547e-06, + "std": 0.021627992391586304, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.21.1.g": { + "min": 0.21460720896720886, + "max": 0.7452309131622314, + "mean": 0.6493626832962036, + "std": 0.054172683507204056, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.20914840698242188, + "max": 0.19524669647216797, + "mean": 4.010984048363753e-05, + "std": 0.03945964202284813, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.32907912135124207, + "max": 0.25925326347351074, + "mean": -0.003227418288588524, + "std": 0.05623279884457588, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.20563212037086487, + "max": 0.25434860587120056, + "mean": 5.404069815995172e-05, + "std": 0.038562316447496414, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.2339768409729, + "max": 6.921723365783691, + "mean": 0.04828859120607376, + "std": 1.383695363998413, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.20957675576210022, + "max": 0.23022468388080597, + "mean": -4.741629709315021e-06, + "std": 0.04131784662604332, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.043760623782873154, + "max": 0.03593071922659874, + "mean": -6.6086213337257504e-06, + "std": 0.012794941663742065, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.3974460959434509, + "max": 0.3449029326438904, + "mean": -5.5259803048102185e-05, + "std": 0.0423947237432003, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.055080167949199677, + "max": 0.06271716207265854, + "mean": 0.0003585012163966894, + "std": 0.018664730712771416, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.3.g": { + "min": 0.3508152663707733, + "max": 1.0430189371109009, + "mean": 0.789574146270752, + "std": 0.048565711826086044, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.3336288034915924, + "max": 0.38612979650497437, + "mean": -0.00016904372023418546, + "std": 0.041490498930215836, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.15726615488529205, + "max": 0.05897233635187149, + "mean": -0.031808022409677505, + "std": 0.02507229521870613, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6961155533790588, + "max": 0.4685930609703064, + "mean": -8.521114068571478e-05, + "std": 0.05180642008781433, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.24746476113796234, + "max": 0.32834842801094055, + "mean": -0.00026278701261617243, + "std": 0.041423212736845016, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.0.weight": { + "min": -0.2869253158569336, + "max": 0.35028234124183655, + "mean": -2.780619524855865e-06, + "std": 0.02424117736518383, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.22.1.g": { + "min": 0.1968069076538086, + "max": 0.7775169014930725, + "mean": 0.6701230406761169, + "std": 0.058515764772892, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.2286878526210785, + "max": 0.23117558658123016, + "mean": -2.085553205688484e-05, + "std": 0.04044000059366226, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.2196890264749527, + "max": 0.24058501422405243, + "mean": 0.0007775035337544978, + "std": 0.05580567941069603, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.21652470529079437, + "max": 0.2261732518672943, + "mean": -7.231749623315409e-05, + "std": 0.03937419131398201, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.891955375671387, + "max": 9.054566383361816, + "mean": -0.0012135691940784454, + "std": 1.846129059791565, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.2690034806728363, + "max": 0.25858405232429504, + "mean": 4.355451892479323e-05, + "std": 0.03841076418757439, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.057884324342012405, + "max": 0.05789237469434738, + "mean": 0.0003543176280800253, + "std": 0.014708762988448143, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.263511061668396, + "max": 0.288027822971344, + "mean": -6.17767364019528e-05, + "std": 0.03907754644751549, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.044037725776433945, + "max": 0.037295691668987274, + "mean": -9.799870167626068e-05, + "std": 0.013339235447347164, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.3.g": { + "min": 0.339274525642395, + "max": 1.0903433561325073, + "mean": 0.8638954162597656, + "std": 0.06374805420637131, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.4230613112449646, + "max": 0.41900894045829773, + "mean": 0.0003136690938845277, + "std": 0.043512988835573196, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.21445079147815704, + "max": 0.17045123875141144, + "mean": -0.029427748173475266, + "std": 0.03184095025062561, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.5979012846946716, + "max": 0.559224545955658, + "mean": -0.00014804149395786226, + "std": 0.053461432456970215, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17863567173480988, + "max": 0.3767751455307007, + "mean": 0.0013495876919478178, + "std": 0.037288032472133636, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.0.weight": { + "min": -0.3942491412162781, + "max": 0.3687455952167511, + "mean": 3.7661615351680666e-05, + "std": 0.028617454692721367, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.23.1.g": { + "min": 0.2907008230686188, + "max": 0.8258129358291626, + "mean": 0.7054593563079834, + "std": 0.06773429363965988, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.9265665411949158, + "max": 1.0269814729690552, + "mean": -2.7912426958209835e-05, + "std": 0.04764382541179657, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.8779393434524536, + "max": 0.8145599365234375, + "mean": -0.0002924790605902672, + "std": 0.09544122219085693, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.27007606625556946, + "max": 0.24068056046962738, + "mean": -2.244842835352756e-05, + "std": 0.038949914276599884, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.705463409423828, + "max": 22.81535530090332, + "mean": -0.09178592264652252, + "std": 4.064526081085205, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.2275296449661255, + "max": 0.2455320507287979, + "mean": -2.5536401153658517e-05, + "std": 0.03864150494337082, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.06007588282227516, + "max": 0.045354753732681274, + "mean": -0.00013596308417618275, + "std": 0.014683394692838192, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.33782336115837097, + "max": 0.3746013939380646, + "mean": 7.420149813697208e-06, + "std": 0.04082043468952179, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.046125710010528564, + "max": 0.19506430625915527, + "mean": 0.0002738517359830439, + "std": 0.013541821390390396, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.3.g": { + "min": 0.373764306306839, + "max": 1.1280238628387451, + "mean": 0.8901123404502869, + "std": 0.06384868174791336, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.44741326570510864, + "max": 0.5422499775886536, + "mean": 2.5218108930857852e-05, + "std": 0.045580700039863586, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.22342835366725922, + "max": 0.08723597973585129, + "mean": -0.03199537843465805, + "std": 0.03770318627357483, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7254156470298767, + "max": 0.6879446506500244, + "mean": 3.628328340710141e-05, + "std": 0.05179440602660179, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.174102783203125, + "max": 0.2178839147090912, + "mean": 3.535003634169698e-05, + "std": 0.03175075352191925, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.0.weight": { + "min": -0.33916032314300537, + "max": 0.37271323800086975, + "mean": 4.308380448492244e-05, + "std": 0.034135378897190094, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.24.1.g": { + "min": 0.3176645338535309, + "max": 1.2846463918685913, + "mean": 0.6014195084571838, + "std": 0.08323279023170471, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.2829808294773102, + "max": 0.26017650961875916, + "mean": -3.064439852096257e-06, + "std": 0.035980723798274994, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23540745675563812, + "max": 0.20547473430633545, + "mean": 0.0002399118966422975, + "std": 0.056001532822847366, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.43518391251564026, + "max": 0.32444700598716736, + "mean": 2.4227547328337096e-05, + "std": 0.03412417694926262, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.537700176239014, + "max": 7.30228853225708, + "mean": -0.007349951192736626, + "std": 0.6983441114425659, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.34386035799980164, + "max": 0.3621582090854645, + "mean": 0.00010323335300199687, + "std": 0.04783642664551735, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07365774363279343, + "max": 0.060269735753536224, + "mean": 0.0009362755226902664, + "std": 0.014931198209524155, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.2561565041542053, + "max": 0.2865042984485626, + "mean": 4.973959221388213e-06, + "std": 0.04156460985541344, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.055231235921382904, + "max": 0.06271004676818848, + "mean": 0.00012724015687126666, + "std": 0.0071450709365308285, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.3.g": { + "min": 0.49412763118743896, + "max": 1.2182179689407349, + "mean": 1.0133787393569946, + "std": 0.11725164949893951, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.0939558744430542, + "max": 1.0474863052368164, + "mean": -4.884616646450013e-05, + "std": 0.052417904138565063, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.22328178584575653, + "max": 0.172784686088562, + "mean": -0.02721056528389454, + "std": 0.0362662672996521, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8832080960273743, + "max": 0.9217195510864258, + "mean": -0.00014604278840124607, + "std": 0.05329865962266922, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.1707809567451477, + "max": 0.3790228068828583, + "mean": 0.003364440519362688, + "std": 0.03984135016798973, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.0.weight": { + "min": -0.7773804068565369, + "max": 0.7221406698226929, + "mean": 1.8065225958707742e-05, + "std": 0.04615423083305359, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.25.1.g": { + "min": 0.33866187930107117, + "max": 1.425328254699707, + "mean": 0.9481796622276306, + "std": 0.20640140771865845, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.7458044290542603, + "max": 1.704500436782837, + "mean": 0.00022708992764819413, + "std": 0.15870554745197296, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.19757080078125, + "max": 1.0991984605789185, + "mean": -0.009535851888358593, + "std": 0.2035919725894928, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.4207988381385803, + "max": 0.4279989004135132, + "mean": 6.386132736224681e-05, + "std": 0.04802023991942406, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.71625328063965, + "max": 19.51169776916504, + "mean": -0.24800625443458557, + "std": 4.769559860229492, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.3236338496208191, + "max": 0.438272625207901, + "mean": -1.1853729120048229e-05, + "std": 0.04616710543632507, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.03371698036789894, + "max": 0.03678824380040169, + "mean": 0.0006397695397026837, + "std": 0.0129077835008502, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.7031863331794739, + "max": 0.6687424182891846, + "mean": 4.257483669789508e-05, + "std": 0.057892125099897385, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.0722241997718811, + "max": 0.0676589161157608, + "mean": -0.0001341316383332014, + "std": 0.012878631241619587, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.3.g": { + "min": 0.38035547733306885, + "max": 1.3902052640914917, + "mean": 1.066498041152954, + "std": 0.21949008107185364, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6164002418518066, + "max": 0.7182905673980713, + "mean": 0.00011321296915411949, + "std": 0.05802781134843826, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.2184617668390274, + "max": 0.22462666034698486, + "mean": 0.006169781554490328, + "std": 0.04965030029416084, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6297575831413269, + "max": 0.8895801901817322, + "mean": 1.2445932043192443e-05, + "std": 0.023545311763882637, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.506031334400177, + "max": 0.47297078371047974, + "mean": -0.0030135007109493017, + "std": 0.0691458210349083, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.norm_out.g": { + "min": 0.5383259057998657, + "max": 1.1772801876068115, + "mean": 0.7824772596359253, + "std": 0.09824033081531525, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.proj_out.weight": { + "min": -0.26664498448371887, + "max": 0.2126948982477188, + "mean": -0.00022273289505392313, + "std": 0.05400582030415535, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ] + }, + "transformer.proj_out.bias": { + "min": -0.23798410594463348, + "max": 0.014864158816635609, + "mean": -0.04389958456158638, + "std": 0.03423725813627243, + "sparsity": 0.0, + "shape": [ + 100 + ] + } + } +} \ No newline at end of file